diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 0e093a834..b618e1e65 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -15,13 +15,17 @@ jobs: matrix: options: - os: ubuntu-latest + framework: netcoreapp3.1 + runtime: -x64 + codecov: true + - os: macos-latest framework: netcoreapp3.1 runtime: -x64 codecov: false - os: windows-latest framework: netcoreapp3.1 runtime: -x64 - codecov: true + codecov: false - os: windows-latest framework: netcoreapp2.1 runtime: -x64 diff --git a/.runsettings b/.runsettings new file mode 100644 index 000000000..ca48342bd --- /dev/null +++ b/.runsettings @@ -0,0 +1,7 @@ + + + + + category!=failing + + diff --git a/Directory.Build.props b/Directory.Build.props index 0f9c5bdde..bb97810a8 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -15,6 +15,7 @@ $(MSBuildThisFileDirectory)artifacts/ $(SixLaborsProjectCategory)/$(MSBuildProjectName) https://github.com/SixLabors/ImageSharp/ + $(MSBuildThisFileDirectory)/.runsettings @@ -120,6 +121,7 @@ https://api.nuget.org/v3/index.json; https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-eng/nuget/v3/index.json; + https://www.myget.org/F/coverlet-dev/api/v3/index.json; true $(MSBuildThisFileDirectory)shared-infrastructure/SixLabors.snk diff --git a/Directory.Build.targets b/Directory.Build.targets index 4e7ab9e6b..2a7d25b97 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -18,22 +18,18 @@ - + - + - - + diff --git a/ImageSharp.sln b/ImageSharp.sln index 3ebc9453f..a8e0fd330 100644 --- a/ImageSharp.sln +++ b/ImageSharp.sln @@ -8,6 +8,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution .gitattributes = .gitattributes .gitignore = .gitignore .gitmodules = .gitmodules + .runsettings = .runsettings ci-build.ps1 = ci-build.ps1 ci-pack.ps1 = ci-pack.ps1 ci-test.ps1 = ci-test.ps1 diff --git a/src/Directory.Build.targets b/src/Directory.Build.targets index d1875262d..9b8be05b5 100644 --- a/src/Directory.Build.targets +++ b/src/Directory.Build.targets @@ -21,16 +21,25 @@ - + + $([System.IO.Path]::Combine('$(IntermediateOutputPath)','$(TargetFrameworkMoniker).AssemblyAttributes$(DefaultLanguageSourceExtension)')) + + + + + + + + + DependsOnTargets="InitializeSourceRootMappedPaths" + Returns="@(_LocalTopLevelSourceRoot)" + Condition="'$(DeterministicSourcePaths)' == 'true'"> <_LocalTopLevelSourceRoot Include="@(SourceRoot)" Condition="'%(SourceRoot.NestedRoot)' == ''"/> - + false @@ -62,7 +71,7 @@ - + @@ -74,7 +83,7 @@ SkipUnchangedFiles = "true" DestinationFolder="..\..\" /> - + - + diff --git a/src/ImageSharp/Common/Helpers/ImageMaths.cs b/src/ImageSharp/Common/Helpers/ImageMaths.cs index 977432f8b..d24230fe1 100644 --- a/src/ImageSharp/Common/Helpers/ImageMaths.cs +++ b/src/ImageSharp/Common/Helpers/ImageMaths.cs @@ -132,6 +132,12 @@ namespace SixLabors.ImageSharp return (a / GreatestCommonDivisor(a, b)) * b; } + /// + /// Calculates % 2 + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static int Modulo2(int x) => x & 1; + /// /// Calculates % 4 /// diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs new file mode 100644 index 000000000..7687a5b95 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -0,0 +1,193 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Buffers.Binary; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// The JIT can detect and optimize rotation idioms ROTL (Rotate Left) +// and ROTR (Rotate Right) emitting efficient CPU instructions: +// https://github.com/dotnet/coreclr/pull/1830 +namespace SixLabors.ImageSharp +{ + /// + /// Defines the contract for methods that allow the shuffling of pixel components. + /// Used for shuffling on platforms that do not support Hardware Intrinsics. + /// + internal interface IComponentShuffle + { + /// + /// Gets the shuffle control. + /// + byte Control { get; } + + /// + /// Shuffle 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + void RunFallbackShuffle(ReadOnlySpan source, Span dest); + } + + /// + internal interface IShuffle4 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle4 : IShuffle4 + { + private readonly byte p3; + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) + { + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + + this.p3 = p3; + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p3 = this.p3; + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + for (int i = 0; i < source.Length; i += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); + } + } + } + + internal readonly struct WXYZShuffle4 : IShuffle4 + { + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; + + for (int i = 0; i < n; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTL(8, packed) = [Z Y X W] + Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); + } + } + } + + internal readonly struct WZYXShuffle4 : IShuffle4 + { + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; + + for (int i = 0; i < n; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // REVERSE(packedArgb) = [X Y Z W] + Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); + } + } + } + + internal readonly struct YZWXShuffle4 : IShuffle4 + { + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; + + for (int i = 0; i < n; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTR(8, packedArgb) = [Y Z W X] + Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24); + } + } + } + + internal readonly struct ZYXWShuffle4 : IShuffle4 + { + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; + + for (int i = 0; i < n; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // tmp1 = [W 0 Y 0] + // tmp2 = [0 Z 0 X] + // tmp3=ROTL(16, tmp2) = [0 X 0 Z] + // tmp1 + tmp3 = [W X Y Z] + uint tmp1 = packed & 0xFF00FF00; + uint tmp2 = packed & 0x00FF00FF; + uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); + + Unsafe.Add(ref dBase, i) = tmp1 + tmp3; + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs new file mode 100644 index 000000000..0c2b1d508 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -0,0 +1,103 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IPad3Shuffle4 : IComponentShuffle + { + } + + internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 + { + private readonly byte p3; + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0) + { + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + + this.p3 = p3; + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p3 = this.p3; + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + Span temp = stackalloc byte[4]; + ref byte t = ref MemoryMarshal.GetReference(temp); + ref uint tu = ref Unsafe.As(ref t); + + for (int i = 0, j = 0; i < source.Length; i += 3, j += 4) + { + ref var s = ref Unsafe.Add(ref sBase, i); + tu = Unsafe.As(ref s) | 0xFF000000; + + Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0); + Unsafe.Add(ref dBase, j + 1) = Unsafe.Add(ref t, p1); + Unsafe.Add(ref dBase, j + 2) = Unsafe.Add(ref t, p2); + Unsafe.Add(ref dBase, j + 3) = Unsafe.Add(ref t, p3); + } + } + } + + internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4 + { + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + ref byte sEnd = ref Unsafe.Add(ref sBase, source.Length); + ref byte sLoopEnd = ref Unsafe.Subtract(ref sEnd, 4); + + while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd)) + { + Unsafe.As(ref dBase) = Unsafe.As(ref sBase) | 0xFF000000; + + sBase = ref Unsafe.Add(ref sBase, 3); + dBase = ref Unsafe.Add(ref dBase, 4); + } + + while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd)) + { + Unsafe.Add(ref dBase, 0) = Unsafe.Add(ref sBase, 0); + Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1); + Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2); + Unsafe.Add(ref dBase, 3) = byte.MaxValue; + + sBase = ref Unsafe.Add(ref sBase, 3); + dBase = ref Unsafe.Add(ref dBase, 4); + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs new file mode 100644 index 000000000..61e99890e --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -0,0 +1,53 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IShuffle3 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle3 : IShuffle3 + { + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultShuffle3(byte p2, byte p1, byte p0) + { + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 2, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 2, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 2, nameof(p0)); + + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + for (int i = 0; i < source.Length; i += 3) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs new file mode 100644 index 000000000..86e4174f1 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -0,0 +1,101 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IShuffle4Slice3 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 + { + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0) + { + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + j); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + j); + } + } + } + + internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3 + { + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / 4; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + ref uint sLoopEnd = ref Unsafe.Add(ref sBase, u); + ref uint sEnd = ref Unsafe.Add(ref sBase, n); + + while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd)) + { + Unsafe.Add(ref dBase, 0) = Unsafe.As(ref Unsafe.Add(ref sBase, 0)); + Unsafe.Add(ref dBase, 1) = Unsafe.As(ref Unsafe.Add(ref sBase, 1)); + Unsafe.Add(ref dBase, 2) = Unsafe.As(ref Unsafe.Add(ref sBase, 2)); + Unsafe.Add(ref dBase, 3) = Unsafe.As(ref Unsafe.Add(ref sBase, 3)); + + sBase = ref Unsafe.Add(ref sBase, 4); + dBase = ref Unsafe.Add(ref dBase, 4); + } + + while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd)) + { + Unsafe.Add(ref dBase, 0) = Unsafe.As(ref Unsafe.Add(ref sBase, 0)); + + sBase = ref Unsafe.Add(ref sBase, 1); + dBase = ref Unsafe.Add(ref dBase, 1); + } + } + } + + [StructLayout(LayoutKind.Explicit, Size = 3)] + internal readonly struct Byte3 + { + } +} diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs deleted file mode 100644 index b56c92dab..000000000 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -#if SUPPORTS_RUNTIME_INTRINSICS - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; - -namespace SixLabors.ImageSharp -{ - internal static partial class SimdUtils - { - public static class Avx2Intrinsics - { - private static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; - - /// - /// as many elements as possible, slicing them down (keeping the remainder). - /// - [MethodImpl(InliningOptions.ShortMethod)] - internal static void NormalizedFloatToByteSaturateReduce( - ref ReadOnlySpan source, - ref Span dest) - { - DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); - - if (Avx2.IsSupported) - { - int remainder = ImageMaths.ModuloP2(source.Length, Vector.Count); - int adjustedCount = source.Length - remainder; - - if (adjustedCount > 0) - { - NormalizedFloatToByteSaturate( - source.Slice(0, adjustedCount), - dest.Slice(0, adjustedCount)); - - source = source.Slice(adjustedCount); - dest = dest.Slice(adjustedCount); - } - } - } - - /// - /// Implementation of , which is faster on new .NET runtime. - /// - /// - /// Implementation is based on MagicScaler code: - /// https://github.com/saucecontrol/PhotoSauce/blob/a9bd6e5162d2160419f0cf743fd4f536c079170b/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L453-L477 - /// - internal static void NormalizedFloatToByteSaturate( - ReadOnlySpan source, - Span dest) - { - VerifySpanInput(source, dest, Vector256.Count); - - int n = dest.Length / Vector256.Count; - - ref Vector256 sourceBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); - ref Vector256 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - - var maxBytes = Vector256.Create(255f); - ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32); - Vector256 mask = Unsafe.As>(ref maskBase); - - for (int i = 0; i < n; i++) - { - ref Vector256 s = ref Unsafe.Add(ref sourceBase, i * 4); - - Vector256 f0 = s; - Vector256 f1 = Unsafe.Add(ref s, 1); - Vector256 f2 = Unsafe.Add(ref s, 2); - Vector256 f3 = Unsafe.Add(ref s, 3); - - Vector256 w0 = ConvertToInt32(f0, maxBytes); - Vector256 w1 = ConvertToInt32(f1, maxBytes); - Vector256 w2 = ConvertToInt32(f2, maxBytes); - Vector256 w3 = ConvertToInt32(f3, maxBytes); - - Vector256 u0 = Avx2.PackSignedSaturate(w0, w1); - Vector256 u1 = Avx2.PackSignedSaturate(w2, w3); - Vector256 b = Avx2.PackUnsignedSaturate(u0, u1); - b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte(); - - Unsafe.Add(ref destBase, i) = b; - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 ConvertToInt32(Vector256 vf, Vector256 scale) - { - vf = Avx.Multiply(vf, scale); - return Avx.ConvertToVector256Int32(vf); - } - } - } -} -#endif diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs new file mode 100644 index 000000000..2ea7f2c9b --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -0,0 +1,795 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +#if SUPPORTS_RUNTIME_INTRINSICS +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace SixLabors.ImageSharp +{ + internal static partial class SimdUtils + { + public static class HwIntrinsics + { + public static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; + + public static ReadOnlySpan PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 }; + + private static ReadOnlySpan ShuffleMaskPad4Nx16 => new byte[] { 0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80 }; + + private static ReadOnlySpan ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 }; + + /// + /// Shuffle single-precision (32-bit) floating-point elements in + /// using the control and store the results in . + /// + /// The source span of floats. + /// The destination span of floats. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Avx.IsSupported || Sse.IsSupported) + { + int remainder = Avx.IsSupported + ? ImageMaths.ModuloP2(source.Length, Vector256.Count) + : ImageMaths.ModuloP2(source.Length, Vector128.Count); + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + Shuffle4( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Shuffle 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Avx2.IsSupported || Ssse3.IsSupported) + { + int remainder = Avx2.IsSupported + ? ImageMaths.ModuloP2(source.Length, Vector256.Count) + : ImageMaths.ModuloP2(source.Length, Vector128.Count); + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + Shuffle4( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Shuffles 8-bit integer triplets within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle3Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = source.Length % (Vector128.Count * 3); + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + Shuffle3( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Pads then shuffles 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Pad3Shuffle4Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = source.Length % (Vector128.Count * 3); + + int sourceCount = source.Length - remainder; + int destCount = sourceCount * 4 / 3; + + if (sourceCount > 0) + { + Pad3Shuffle4( + source.Slice(0, sourceCount), + dest.Slice(0, destCount), + control); + + source = source.Slice(sourceCount); + dest = dest.Slice(destCount); + } + } + } + + /// + /// Shuffles then slices 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Slice3Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = source.Length % (Vector128.Count * 4); + + int sourceCount = source.Length - remainder; + int destCount = sourceCount * 3 / 4; + + if (sourceCount > 0) + { + Shuffle4Slice3( + source.Slice(0, sourceCount), + dest.Slice(0, destCount), + control); + + source = source.Slice(sourceCount); + dest = dest.Slice(destCount); + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle4( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Avx.IsSupported) + { + ref Vector256 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = dest.Length / Vector256.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) + { + ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); + ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); + + vd0 = Avx.Permute(vs0, control); + Unsafe.Add(ref vd0, 1) = Avx.Permute(Unsafe.Add(ref vs0, 1), control); + Unsafe.Add(ref vd0, 2) = Avx.Permute(Unsafe.Add(ref vs0, 2), control); + Unsafe.Add(ref vd0, 3) = Avx.Permute(Unsafe.Add(ref vs0, 3), control); + } + + if (m > 0) + { + for (int i = u; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control); + } + } + } + else + { + // Sse + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = dest.Length / Vector128.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) + { + ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); + ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); + + vd0 = Sse.Shuffle(vs0, vs0, control); + + Vector128 vs1 = Unsafe.Add(ref vs0, 1); + Unsafe.Add(ref vd0, 1) = Sse.Shuffle(vs1, vs1, control); + + Vector128 vs2 = Unsafe.Add(ref vs0, 2); + Unsafe.Add(ref vd0, 2) = Sse.Shuffle(vs2, vs2, control); + + Vector128 vs3 = Unsafe.Add(ref vs0, 3); + Unsafe.Add(ref vd0, 3) = Sse.Shuffle(vs3, vs3, control); + } + + if (m > 0) + { + for (int i = u; i < n; i++) + { + Vector128 vs = Unsafe.Add(ref sourceBase, i); + Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control); + } + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle4( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Avx2.IsSupported) + { + // I've chosen to do this for convenience while we determine what + // shuffle controls to add to the library. + // We can add static ROS instances if need be in the future. + Span bytes = stackalloc byte[Vector256.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector256 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector256 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = dest.Length / Vector256.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) + { + ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); + ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); + + vd0 = Avx2.Shuffle(vs0, vshuffle); + Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle); + Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle); + Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle); + } + + if (m > 0) + { + for (int i = u; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle); + } + } + } + else + { + // Ssse3 + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = dest.Length / Vector128.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) + { + ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); + ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); + + vd0 = Ssse3.Shuffle(vs0, vshuffle); + Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle); + Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle); + Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle); + } + + if (m > 0) + { + for (int i = u; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle); + } + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle3( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16); + Vector128 vmask = Unsafe.As>(ref vmaskBase); + ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16); + Vector128 vmasko = Unsafe.As>(ref vmaskoBase); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0; i < n; i += 3) + { + ref Vector128 vs = ref Unsafe.Add(ref sourceBase, i); + + Vector128 v0 = vs; + Vector128 v1 = Unsafe.Add(ref vs, 1); + Vector128 v2 = Unsafe.Add(ref vs, 2); + Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v2 = Ssse3.AlignRight(v2, v1, 8); + v1 = Ssse3.AlignRight(v1, v0, 12); + + v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vmask), vshuffle); + v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vmask), vshuffle); + v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vmask), vshuffle); + v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vmask), vshuffle); + + v0 = Ssse3.Shuffle(v0, vmaske); + v1 = Ssse3.Shuffle(v1, vmasko); + v2 = Ssse3.Shuffle(v2, vmaske); + v3 = Ssse3.Shuffle(v3, vmasko); + + v0 = Ssse3.AlignRight(v1, v0, 4); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); + + ref Vector128 vd = ref Unsafe.Add(ref destBase, i); + + vd = v0; + Unsafe.Add(ref vd, 1) = v1; + Unsafe.Add(ref vd, 2) = v3; + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Pad3Shuffle4( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16); + Vector128 vmask = Unsafe.As>(ref vmaskBase); + Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0, j = 0; i < n; i += 3, j += 4) + { + ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); + Vector128 v1 = Unsafe.Add(ref v0, 1); + Vector128 v2 = Unsafe.Add(ref v0, 2); + Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v2 = Ssse3.AlignRight(v2, v1, 8); + v1 = Ssse3.AlignRight(v1, v0, 12); + + ref Vector128 vd = ref Unsafe.Add(ref destBase, j); + + vd = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v0, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 1) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v1, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 2) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v2, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 3) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v3, vmask), vfill), vshuffle); + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle4Slice3( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16); + Vector128 vmasko = Unsafe.As>(ref vmaskoBase); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0, j = 0; i < n; i += 4, j += 3) + { + ref Vector128 vs = ref Unsafe.Add(ref sourceBase, i); + + Vector128 v0 = vs; + Vector128 v1 = Unsafe.Add(ref vs, 1); + Vector128 v2 = Unsafe.Add(ref vs, 2); + Vector128 v3 = Unsafe.Add(ref vs, 3); + + v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vshuffle), vmaske); + v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vshuffle), vmasko); + v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vshuffle), vmaske); + v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vshuffle), vmasko); + + v0 = Ssse3.AlignRight(v1, v0, 4); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); + + ref Vector128 vd = ref Unsafe.Add(ref destBase, j); + + vd = v0; + Unsafe.Add(ref vd, 1) = v1; + Unsafe.Add(ref vd, 2) = v3; + } + } + } + + /// + /// Performs a multiplication and an addition of the . + /// + /// The vector to add to the intermediate result. + /// The first vector to multiply. + /// The second vector to multiply. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Vector256 MultiplyAdd( + in Vector256 va, + in Vector256 vm0, + in Vector256 vm1) + { + if (Fma.IsSupported) + { + return Fma.MultiplyAdd(vm1, vm0, va); + } + else + { + return Avx.Add(Avx.Multiply(vm0, vm1), va); + } + } + + /// + /// as many elements as possible, slicing them down (keeping the remainder). + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void ByteToNormalizedFloatReduce( + ref ReadOnlySpan source, + ref Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + + if (Avx2.IsSupported || Sse2.IsSupported) + { + int remainder; + if (Avx2.IsSupported) + { + remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); + } + else + { + remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + } + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + ByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount)); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Implementation , which is faster on new RyuJIT runtime. + /// + /// + /// Implementation is based on MagicScaler code: + /// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L80-L182 + /// + internal static unsafe void ByteToNormalizedFloat( + ReadOnlySpan source, + Span dest) + { + if (Avx2.IsSupported) + { + VerifySpanInput(source, dest, Vector256.Count); + + int n = dest.Length / Vector256.Count; + + byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector256.Create(1 / (float)byte.MaxValue); + + for (int i = 0; i < n; i++) + { + int si = Vector256.Count * i; + Vector256 i0 = Avx2.ConvertToVector256Int32(sourceBase + si); + Vector256 i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256.Count); + Vector256 i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256.Count * 2)); + Vector256 i3 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256.Count * 3)); + + Vector256 f0 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i0)); + Vector256 f1 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i1)); + Vector256 f2 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i2)); + Vector256 f3 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i3)); + + ref Vector256 d = ref Unsafe.Add(ref destBase, i * 4); + + d = f0; + Unsafe.Add(ref d, 1) = f1; + Unsafe.Add(ref d, 2) = f2; + Unsafe.Add(ref d, 3) = f3; + } + } + else + { + // Sse + VerifySpanInput(source, dest, Vector128.Count); + + int n = dest.Length / Vector128.Count; + + byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector128.Create(1 / (float)byte.MaxValue); + Vector128 zero = Vector128.Zero; + + for (int i = 0; i < n; i++) + { + int si = Vector128.Count * i; + + Vector128 i0, i1, i2, i3; + if (Sse41.IsSupported) + { + i0 = Sse41.ConvertToVector128Int32(sourceBase + si); + i1 = Sse41.ConvertToVector128Int32(sourceBase + si + Vector128.Count); + i2 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128.Count * 2)); + i3 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128.Count * 3)); + } + else + { + Vector128 b = Sse2.LoadVector128(sourceBase + si); + Vector128 s0 = Sse2.UnpackLow(b, zero).AsInt16(); + Vector128 s1 = Sse2.UnpackHigh(b, zero).AsInt16(); + + i0 = Sse2.UnpackLow(s0, zero.AsInt16()).AsInt32(); + i1 = Sse2.UnpackHigh(s0, zero.AsInt16()).AsInt32(); + i2 = Sse2.UnpackLow(s1, zero.AsInt16()).AsInt32(); + i3 = Sse2.UnpackHigh(s1, zero.AsInt16()).AsInt32(); + } + + Vector128 f0 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i0)); + Vector128 f1 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i1)); + Vector128 f2 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i2)); + Vector128 f3 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i3)); + + ref Vector128 d = ref Unsafe.Add(ref destBase, i * 4); + + d = f0; + Unsafe.Add(ref d, 1) = f1; + Unsafe.Add(ref d, 2) = f2; + Unsafe.Add(ref d, 3) = f3; + } + } + } + + /// + /// as many elements as possible, slicing them down (keeping the remainder). + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void NormalizedFloatToByteSaturateReduce( + ref ReadOnlySpan source, + ref Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + + if (Avx2.IsSupported || Sse2.IsSupported) + { + int remainder; + if (Avx2.IsSupported) + { + remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); + } + else + { + remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + } + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + NormalizedFloatToByteSaturate( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount)); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Implementation of , which is faster on new .NET runtime. + /// + /// + /// Implementation is based on MagicScaler code: + /// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L541-L622 + /// + internal static void NormalizedFloatToByteSaturate( + ReadOnlySpan source, + Span dest) + { + if (Avx2.IsSupported) + { + VerifySpanInput(source, dest, Vector256.Count); + + int n = dest.Length / Vector256.Count; + + ref Vector256 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector256.Create((float)byte.MaxValue); + ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32); + Vector256 mask = Unsafe.As>(ref maskBase); + + for (int i = 0; i < n; i++) + { + ref Vector256 s = ref Unsafe.Add(ref sourceBase, i * 4); + + Vector256 f0 = Avx.Multiply(scale, s); + Vector256 f1 = Avx.Multiply(scale, Unsafe.Add(ref s, 1)); + Vector256 f2 = Avx.Multiply(scale, Unsafe.Add(ref s, 2)); + Vector256 f3 = Avx.Multiply(scale, Unsafe.Add(ref s, 3)); + + Vector256 w0 = Avx.ConvertToVector256Int32(f0); + Vector256 w1 = Avx.ConvertToVector256Int32(f1); + Vector256 w2 = Avx.ConvertToVector256Int32(f2); + Vector256 w3 = Avx.ConvertToVector256Int32(f3); + + Vector256 u0 = Avx2.PackSignedSaturate(w0, w1); + Vector256 u1 = Avx2.PackSignedSaturate(w2, w3); + Vector256 b = Avx2.PackUnsignedSaturate(u0, u1); + b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte(); + + Unsafe.Add(ref destBase, i) = b; + } + } + else + { + // Sse + VerifySpanInput(source, dest, Vector128.Count); + + int n = dest.Length / Vector128.Count; + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector128.Create((float)byte.MaxValue); + + for (int i = 0; i < n; i++) + { + ref Vector128 s = ref Unsafe.Add(ref sourceBase, i * 4); + + Vector128 f0 = Sse.Multiply(scale, s); + Vector128 f1 = Sse.Multiply(scale, Unsafe.Add(ref s, 1)); + Vector128 f2 = Sse.Multiply(scale, Unsafe.Add(ref s, 2)); + Vector128 f3 = Sse.Multiply(scale, Unsafe.Add(ref s, 3)); + + Vector128 w0 = Sse2.ConvertToVector128Int32(f0); + Vector128 w1 = Sse2.ConvertToVector128Int32(f1); + Vector128 w2 = Sse2.ConvertToVector128Int32(f2); + Vector128 w3 = Sse2.ConvertToVector128Int32(f3); + + Vector128 u0 = Sse2.PackSignedSaturate(w0, w1); + Vector128 u1 = Sse2.PackSignedSaturate(w2, w3); + + Unsafe.Add(ref destBase, i) = Sse2.PackUnsignedSaturate(u0, u1); + } + } + } + } + } +} +#endif diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs new file mode 100644 index 000000000..07744566a --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -0,0 +1,275 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + internal static partial class SimdUtils + { + /// + /// Shuffle single-precision (32-bit) floating-point elements in + /// using the control and store the results in . + /// + /// The source span of floats. + /// The destination span of floats. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4( + ReadOnlySpan source, + Span dest, + byte control) + { + VerifyShuffle4SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle4Reduce(ref source, ref dest, control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + Shuffle4Remainder(source, dest, control); + } + } + + /// + /// Shuffle 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4( + ReadOnlySpan source, + Span dest, + TShuffle shuffle) + where TShuffle : struct, IShuffle4 + { + VerifyShuffle4SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + shuffle.RunFallbackShuffle(source, dest); + } + } + + /// + /// Shuffle 8-bit integer triplets within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle3( + ReadOnlySpan source, + Span dest, + TShuffle shuffle) + where TShuffle : struct, IShuffle3 + { + VerifyShuffle3SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle3Reduce(ref source, ref dest, shuffle.Control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + shuffle.RunFallbackShuffle(source, dest); + } + } + + /// + /// Pads then shuffles 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Pad3Shuffle4( + ReadOnlySpan source, + Span dest, + TShuffle shuffle) + where TShuffle : struct, IPad3Shuffle4 + { + VerifyPad3Shuffle4SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, shuffle.Control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + shuffle.RunFallbackShuffle(source, dest); + } + } + + /// + /// Shuffles then slices 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Slice3( + ReadOnlySpan source, + Span dest, + TShuffle shuffle) + where TShuffle : struct, IShuffle4Slice3 + { + VerifyShuffle4Slice3SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, shuffle.Control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + shuffle.RunFallbackShuffle(source, dest); + } + } + + private static void Shuffle4Remainder( + ReadOnlySpan source, + Span dest, + byte control) + { + ref float sBase = ref MemoryMarshal.GetReference(source); + ref float dBase = ref MemoryMarshal.GetReference(dest); + Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); + + for (int i = 0; i < source.Length; i += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); + } + } + + [Conditional("DEBUG")] + private static void VerifyShuffle4SpanInput(ReadOnlySpan source, Span dest) + where T : struct + { + DebugGuard.IsTrue( + source.Length == dest.Length, + nameof(source), + "Input spans must be of same length!"); + + DebugGuard.IsTrue( + source.Length % 4 == 0, + nameof(source), + "Input spans must be divisable by 4!"); + } + + [Conditional("DEBUG")] + private static void VerifyShuffle3SpanInput(ReadOnlySpan source, Span dest) + where T : struct + { + DebugGuard.IsTrue( + source.Length == dest.Length, + nameof(source), + "Input spans must be of same length!"); + + DebugGuard.IsTrue( + source.Length % 3 == 0, + nameof(source), + "Input spans must be divisable by 3!"); + } + + [Conditional("DEBUG")] + private static void VerifyPad3Shuffle4SpanInput(ReadOnlySpan source, Span dest) + { + DebugGuard.IsTrue( + source.Length % 3 == 0, + nameof(source), + "Input span must be divisable by 3!"); + + DebugGuard.IsTrue( + dest.Length % 4 == 0, + nameof(dest), + "Output span must be divisable by 4!"); + + DebugGuard.IsTrue( + source.Length == dest.Length * 3 / 4, + nameof(source), + "Input span must be 3/4 the length of the output span!"); + } + + [Conditional("DEBUG")] + private static void VerifyShuffle4Slice3SpanInput(ReadOnlySpan source, Span dest) + { + DebugGuard.IsTrue( + source.Length % 4 == 0, + nameof(source), + "Input span must be divisable by 4!"); + + DebugGuard.IsTrue( + dest.Length % 3 == 0, + nameof(dest), + "Output span must be divisable by 3!"); + + DebugGuard.IsTrue( + dest.Length >= source.Length * 3 / 4, + nameof(source), + "Output span must be at least 3/4 the length of the input span!"); + } + + public static class Shuffle + { + [MethodImpl(InliningOptions.ShortMethod)] + public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0) + => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); + + [MethodImpl(InliningOptions.ShortMethod)] + public static void MmShuffleSpan(ref Span span, byte control) + { + InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + ref byte spanBase = ref MemoryMarshal.GetReference(span); + + for (int i = 0; i < span.Length; i += 4) + { + Unsafe.Add(ref spanBase, i) = (byte)(p0 + i); + Unsafe.Add(ref spanBase, i + 1) = (byte)(p1 + i); + Unsafe.Add(ref spanBase, i + 2) = (byte)(p2 + i); + Unsafe.Add(ref spanBase, i + 3) = (byte)(p3 + i); + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + public static void InverseMmShuffle( + byte control, + out int p3, + out int p2, + out int p1, + out int p0) + { + p3 = control >> 6 & 0x3; + p2 = control >> 4 & 0x3; + p1 = control >> 2 & 0x3; + p0 = control >> 0 & 0x3; + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index 7f917648d..7cbb5bfe3 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -25,6 +25,25 @@ namespace SixLabors.ImageSharp public static bool HasVector8 { get; } = Vector.IsHardwareAccelerated && Vector.Count == 8 && Vector.Count == 8; + /// + /// Gets a value indicating whether code is being JIT-ed to SSE instructions + /// where float and integer registers are of size 128 byte. + /// + public static bool HasVector4 { get; } = + Vector.IsHardwareAccelerated && Vector.Count == 4; + + public static bool HasAvx2 + { + get + { +#if SUPPORTS_RUNTIME_INTRINSICS + return Avx2.IsSupported; +#else + return false; +#endif + } + } + /// /// Transform all scalars in 'v' in a way that converting them to would have rounding semantics. /// @@ -79,8 +98,9 @@ namespace SixLabors.ImageSharp internal static void ByteToNormalizedFloat(ReadOnlySpan source, Span dest) { DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); - -#if SUPPORTS_EXTENDED_INTRINSICS +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.ByteToNormalizedFloatReduce(ref source, ref dest); +#elif SUPPORTS_EXTENDED_INTRINSICS ExtendedIntrinsics.ByteToNormalizedFloatReduce(ref source, ref dest); #else BasicIntrinsics256.ByteToNormalizedFloatReduce(ref source, ref dest); @@ -110,7 +130,7 @@ namespace SixLabors.ImageSharp DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); #if SUPPORTS_RUNTIME_INTRINSICS - Avx2Intrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest); + HwIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest); #elif SUPPORTS_EXTENDED_INTRINSICS ExtendedIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest); #else diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs index fccc50755..f617e9a3e 100644 --- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs @@ -5,6 +5,10 @@ using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp { @@ -13,6 +17,9 @@ namespace SixLabors.ImageSharp /// internal static class Vector4Utilities { + private const int BlendAlphaControl = 0b_10_00_10_00; + private const int ShuffleAlphaControl = 0b_11_11_11_11; + /// /// Restricts a vector between a minimum and a maximum value. /// 5x Faster then . @@ -56,13 +63,39 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public static void Premultiply(Span vectors) { - // TODO: This method can be AVX2 optimized using Vector - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported && vectors.Length >= 2) + { + ref Vector256 vectorsBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - for (int i = 0; i < vectors.Length; i++) + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); + + while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) + { + Vector256 source = vectorsBase; + Vector256 multiply = Avx.Shuffle(source, source, ShuffleAlphaControl); + vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl); + vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); + } + + if (ImageMaths.Modulo2(vectors.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + Premultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1))); + } + } + else +#endif { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - Premultiply(ref v); + ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + + for (int i = 0; i < vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + Premultiply(ref v); + } } } @@ -73,13 +106,39 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public static void UnPremultiply(Span vectors) { - // TODO: This method can be AVX2 optimized using Vector - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported && vectors.Length >= 2) + { + ref Vector256 vectorsBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - for (int i = 0; i < vectors.Length; i++) + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); + + while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) + { + Vector256 source = vectorsBase; + Vector256 multiply = Avx.Shuffle(source, source, ShuffleAlphaControl); + vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl); + vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); + } + + if (ImageMaths.Modulo2(vectors.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + UnPremultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1))); + } + } + else +#endif { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - UnPremultiply(ref v); + ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + + for (int i = 0; i < vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + UnPremultiply(ref v); + } } } diff --git a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs index eb29c4405..01bdbd1c0 100644 --- a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs +++ b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs @@ -6,7 +6,6 @@ using System.Buffers; using System.IO; using System.Runtime.InteropServices; using System.Threading; -using System.Threading.Tasks; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Common.Helpers; using SixLabors.ImageSharp.Memory; @@ -171,7 +170,7 @@ namespace SixLabors.ImageSharp.Formats.Bmp var fileHeader = new BmpFileHeader( type: BmpConstants.TypeMarkers.Bitmap, - fileSize: BmpFileHeader.Size + infoHeaderSize + infoHeader.ImageSize, + fileSize: BmpFileHeader.Size + infoHeaderSize + colorPaletteSize + infoHeader.ImageSize, reserved: 0, offset: BmpFileHeader.Size + infoHeaderSize + colorPaletteSize); @@ -342,20 +341,11 @@ namespace SixLabors.ImageSharp.Formats.Bmp using IndexedImageFrame quantized = frameQuantizer.BuildPaletteAndQuantizeFrame(image, image.Bounds()); ReadOnlySpan quantizedColors = quantized.Palette.Span; - var color = default(Rgba32); - - // TODO: Use bulk conversion here for better perf - int idx = 0; - foreach (TPixel quantizedColor in quantizedColors) + PixelOperations.Instance.ToBgra32(this.configuration, quantizedColors, MemoryMarshal.Cast(colorPalette)); + Span colorPaletteAsUInt = MemoryMarshal.Cast(colorPalette); + for (int i = 0; i < colorPaletteAsUInt.Length; i++) { - quantizedColor.ToRgba32(ref color); - colorPalette[idx] = color.B; - colorPalette[idx + 1] = color.G; - colorPalette[idx + 2] = color.R; - - // Padding byte, always 0. - colorPalette[idx + 3] = 0; - idx += 4; + colorPaletteAsUInt[i] = colorPaletteAsUInt[i] & 0x00FFFFFF; // Padding byte, always 0. } stream.Write(colorPalette); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs index f6f590368..0efefc06b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs @@ -10,90 +10,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { internal partial struct Block8x8F { - /// - /// Transpose the block into the destination block. - /// - /// The destination block - [MethodImpl(InliningOptions.ShortMethod)] - public void TransposeInto(ref Block8x8F d) - { - d.V0L.X = V0L.X; - d.V1L.X = V0L.Y; - d.V2L.X = V0L.Z; - d.V3L.X = V0L.W; - d.V4L.X = V0R.X; - d.V5L.X = V0R.Y; - d.V6L.X = V0R.Z; - d.V7L.X = V0R.W; - - d.V0L.Y = V1L.X; - d.V1L.Y = V1L.Y; - d.V2L.Y = V1L.Z; - d.V3L.Y = V1L.W; - d.V4L.Y = V1R.X; - d.V5L.Y = V1R.Y; - d.V6L.Y = V1R.Z; - d.V7L.Y = V1R.W; - - d.V0L.Z = V2L.X; - d.V1L.Z = V2L.Y; - d.V2L.Z = V2L.Z; - d.V3L.Z = V2L.W; - d.V4L.Z = V2R.X; - d.V5L.Z = V2R.Y; - d.V6L.Z = V2R.Z; - d.V7L.Z = V2R.W; - - d.V0L.W = V3L.X; - d.V1L.W = V3L.Y; - d.V2L.W = V3L.Z; - d.V3L.W = V3L.W; - d.V4L.W = V3R.X; - d.V5L.W = V3R.Y; - d.V6L.W = V3R.Z; - d.V7L.W = V3R.W; - - d.V0R.X = V4L.X; - d.V1R.X = V4L.Y; - d.V2R.X = V4L.Z; - d.V3R.X = V4L.W; - d.V4R.X = V4R.X; - d.V5R.X = V4R.Y; - d.V6R.X = V4R.Z; - d.V7R.X = V4R.W; - - d.V0R.Y = V5L.X; - d.V1R.Y = V5L.Y; - d.V2R.Y = V5L.Z; - d.V3R.Y = V5L.W; - d.V4R.Y = V5R.X; - d.V5R.Y = V5R.Y; - d.V6R.Y = V5R.Z; - d.V7R.Y = V5R.W; - - d.V0R.Z = V6L.X; - d.V1R.Z = V6L.Y; - d.V2R.Z = V6L.Z; - d.V3R.Z = V6L.W; - d.V4R.Z = V6R.X; - d.V5R.Z = V6R.Y; - d.V6R.Z = V6R.Z; - d.V7R.Z = V6R.W; - - d.V0R.W = V7L.X; - d.V1R.W = V7L.Y; - d.V2R.W = V7L.Z; - d.V3R.W = V7L.W; - d.V4R.W = V7R.X; - d.V5R.W = V7R.Y; - d.V6R.W = V7R.Z; - d.V7R.W = V7R.W; - } - /// /// Level shift by +maximum/2, clip to [0, maximum] /// - public void NormalizeColorsInplace(float maximum) + public void NormalizeColorsInPlace(float maximum) { var CMin4 = new Vector4(0F); var CMax4 = new Vector4(maximum); @@ -118,10 +38,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } /// - /// AVX2-only variant for executing and in one step. + /// AVX2-only variant for executing and in one step. /// [MethodImpl(InliningOptions.ShortMethod)] - public void NormalizeColorsAndRoundInplaceVector8(float maximum) + public void NormalizeColorsAndRoundInPlaceVector8(float maximum) { var off = new Vector(MathF.Ceiling(maximum / 2)); var max = new Vector(maximum); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt index 6ee054021..e5a62dc07 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt @@ -23,42 +23,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { internal partial struct Block8x8F { - /// - /// Transpose the block into the destination block. - /// - /// The destination block - [MethodImpl(InliningOptions.ShortMethod)] - public void TransposeInto(ref Block8x8F d) - { - <# - PushIndent(" "); - - for (int i = 0; i < 8; i++) - { - char destCoord = coordz[i % 4]; - char destSide = (i / 4) % 2 == 0 ? 'L' : 'R'; - - for (int j = 0; j < 8; j++) - { - if(i > 0 && j == 0){ - WriteLine(""); - } - - char srcCoord = coordz[j % 4]; - char srcSide = (j / 4) % 2 == 0 ? 'L' : 'R'; - - var expression = $"d.V{j}{destSide}.{destCoord} = V{i}{srcSide}.{srcCoord};\r\n"; - Write(expression); - } - } - PopIndent(); - #> - } - /// /// Level shift by +maximum/2, clip to [0, maximum] /// - public void NormalizeColorsInplace(float maximum) + public void NormalizeColorsInPlace(float maximum) { var CMin4 = new Vector4(0F); var CMax4 = new Vector4(maximum); @@ -81,10 +49,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } /// - /// AVX2-only variant for executing and in one step. + /// AVX2-only variant for executing and in one step. /// [MethodImpl(InliningOptions.ShortMethod)] - public void NormalizeColorsAndRoundInplaceVector8(float maximum) + public void NormalizeColorsAndRoundInPlaceVector8(float maximum) { var off = new Vector(MathF.Ceiling(maximum / 2)); var max = new Vector(maximum); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index b7835d670..0dbdadbeb 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -6,6 +6,10 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif using System.Text; // ReSharper disable InconsistentNaming @@ -277,73 +281,156 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// The value to multiply by. [MethodImpl(InliningOptions.ShortMethod)] - public void MultiplyInplace(float value) - { - this.V0L *= value; - this.V0R *= value; - this.V1L *= value; - this.V1R *= value; - this.V2L *= value; - this.V2R *= value; - this.V3L *= value; - this.V3R *= value; - this.V4L *= value; - this.V4R *= value; - this.V5L *= value; - this.V5R *= value; - this.V6L *= value; - this.V6R *= value; - this.V7L *= value; - this.V7R *= value; + public void MultiplyInPlace(float value) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + var valueVec = Vector256.Create(value); + Unsafe.As>(ref this.V0L) = Avx.Multiply(Unsafe.As>(ref this.V0L), valueVec); + Unsafe.As>(ref this.V1L) = Avx.Multiply(Unsafe.As>(ref this.V1L), valueVec); + Unsafe.As>(ref this.V2L) = Avx.Multiply(Unsafe.As>(ref this.V2L), valueVec); + Unsafe.As>(ref this.V3L) = Avx.Multiply(Unsafe.As>(ref this.V3L), valueVec); + Unsafe.As>(ref this.V4L) = Avx.Multiply(Unsafe.As>(ref this.V4L), valueVec); + Unsafe.As>(ref this.V5L) = Avx.Multiply(Unsafe.As>(ref this.V5L), valueVec); + Unsafe.As>(ref this.V6L) = Avx.Multiply(Unsafe.As>(ref this.V6L), valueVec); + Unsafe.As>(ref this.V7L) = Avx.Multiply(Unsafe.As>(ref this.V7L), valueVec); + } + else +#endif + { + var valueVec = new Vector4(value); + this.V0L *= valueVec; + this.V0R *= valueVec; + this.V1L *= valueVec; + this.V1R *= valueVec; + this.V2L *= valueVec; + this.V2R *= valueVec; + this.V3L *= valueVec; + this.V3R *= valueVec; + this.V4L *= valueVec; + this.V4R *= valueVec; + this.V5L *= valueVec; + this.V5R *= valueVec; + this.V6L *= valueVec; + this.V6R *= valueVec; + this.V7L *= valueVec; + this.V7R *= valueVec; + } } /// /// Multiply all elements of the block by the corresponding elements of 'other'. /// [MethodImpl(InliningOptions.ShortMethod)] - public void MultiplyInplace(ref Block8x8F other) - { - this.V0L *= other.V0L; - this.V0R *= other.V0R; - this.V1L *= other.V1L; - this.V1R *= other.V1R; - this.V2L *= other.V2L; - this.V2R *= other.V2R; - this.V3L *= other.V3L; - this.V3R *= other.V3R; - this.V4L *= other.V4L; - this.V4R *= other.V4R; - this.V5L *= other.V5L; - this.V5R *= other.V5R; - this.V6L *= other.V6L; - this.V6R *= other.V6R; - this.V7L *= other.V7L; - this.V7R *= other.V7R; + public unsafe void MultiplyInPlace(ref Block8x8F other) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + Unsafe.As>(ref this.V0L) + = Avx.Multiply( + Unsafe.As>(ref this.V0L), + Unsafe.As>(ref other.V0L)); + + Unsafe.As>(ref this.V1L) + = Avx.Multiply( + Unsafe.As>(ref this.V1L), + Unsafe.As>(ref other.V1L)); + + Unsafe.As>(ref this.V2L) + = Avx.Multiply( + Unsafe.As>(ref this.V2L), + Unsafe.As>(ref other.V2L)); + + Unsafe.As>(ref this.V3L) + = Avx.Multiply( + Unsafe.As>(ref this.V3L), + Unsafe.As>(ref other.V3L)); + + Unsafe.As>(ref this.V4L) + = Avx.Multiply( + Unsafe.As>(ref this.V4L), + Unsafe.As>(ref other.V4L)); + + Unsafe.As>(ref this.V5L) + = Avx.Multiply( + Unsafe.As>(ref this.V5L), + Unsafe.As>(ref other.V5L)); + + Unsafe.As>(ref this.V6L) + = Avx.Multiply( + Unsafe.As>(ref this.V6L), + Unsafe.As>(ref other.V6L)); + + Unsafe.As>(ref this.V7L) + = Avx.Multiply( + Unsafe.As>(ref this.V7L), + Unsafe.As>(ref other.V7L)); + } + else +#endif + { + this.V0L *= other.V0L; + this.V0R *= other.V0R; + this.V1L *= other.V1L; + this.V1R *= other.V1R; + this.V2L *= other.V2L; + this.V2R *= other.V2R; + this.V3L *= other.V3L; + this.V3R *= other.V3R; + this.V4L *= other.V4L; + this.V4R *= other.V4R; + this.V5L *= other.V5L; + this.V5R *= other.V5R; + this.V6L *= other.V6L; + this.V6R *= other.V6R; + this.V7L *= other.V7L; + this.V7R *= other.V7R; + } } /// /// Adds a vector to all elements of the block. /// - /// The added vector + /// The added vector. [MethodImpl(InliningOptions.ShortMethod)] - public void AddToAllInplace(Vector4 diff) - { - this.V0L += diff; - this.V0R += diff; - this.V1L += diff; - this.V1R += diff; - this.V2L += diff; - this.V2R += diff; - this.V3L += diff; - this.V3R += diff; - this.V4L += diff; - this.V4R += diff; - this.V5L += diff; - this.V5R += diff; - this.V6L += diff; - this.V6R += diff; - this.V7L += diff; - this.V7R += diff; + public void AddInPlace(float value) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + var valueVec = Vector256.Create(value); + Unsafe.As>(ref this.V0L) = Avx.Add(Unsafe.As>(ref this.V0L), valueVec); + Unsafe.As>(ref this.V1L) = Avx.Add(Unsafe.As>(ref this.V1L), valueVec); + Unsafe.As>(ref this.V2L) = Avx.Add(Unsafe.As>(ref this.V2L), valueVec); + Unsafe.As>(ref this.V3L) = Avx.Add(Unsafe.As>(ref this.V3L), valueVec); + Unsafe.As>(ref this.V4L) = Avx.Add(Unsafe.As>(ref this.V4L), valueVec); + Unsafe.As>(ref this.V5L) = Avx.Add(Unsafe.As>(ref this.V5L), valueVec); + Unsafe.As>(ref this.V6L) = Avx.Add(Unsafe.As>(ref this.V6L), valueVec); + Unsafe.As>(ref this.V7L) = Avx.Add(Unsafe.As>(ref this.V7L), valueVec); + } + else +#endif + { + var valueVec = new Vector4(value); + this.V0L += valueVec; + this.V0R += valueVec; + this.V1L += valueVec; + this.V1R += valueVec; + this.V2L += valueVec; + this.V2R += valueVec; + this.V3L += valueVec; + this.V3R += valueVec; + this.V4L += valueVec; + this.V4R += valueVec; + this.V5L += valueVec; + this.V5R += valueVec; + this.V6L += valueVec; + this.V6R += valueVec; + this.V7L += valueVec; + this.V7R += valueVec; + } } /// @@ -464,23 +551,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Level shift by +maximum/2, clip to [0..maximum], and round all the values in the block. /// - public void NormalizeColorsAndRoundInplace(float maximum) + public void NormalizeColorsAndRoundInPlace(float maximum) { if (SimdUtils.HasVector8) { - this.NormalizeColorsAndRoundInplaceVector8(maximum); + this.NormalizeColorsAndRoundInPlaceVector8(maximum); } else { - this.NormalizeColorsInplace(maximum); - this.RoundInplace(); + this.NormalizeColorsInPlace(maximum); + this.RoundInPlace(); } } /// /// Rounds all values in the block. /// - public void RoundInplace() + public void RoundInPlace() { for (int i = 0; i < Size; i++) { @@ -596,5 +683,157 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components DebugGuard.MustBeLessThan(idx, Size, nameof(idx)); DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx)); } + + /// + /// Transpose the block into the destination block. + /// + /// The destination block + [MethodImpl(InliningOptions.ShortMethod)] + public void TransposeInto(ref Block8x8F d) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + // https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536 + Vector256 r0 = Avx.InsertVector128( + Unsafe.As>(ref this.V0L).ToVector256(), + Unsafe.As>(ref this.V4L), + 1); + + Vector256 r1 = Avx.InsertVector128( + Unsafe.As>(ref this.V1L).ToVector256(), + Unsafe.As>(ref this.V5L), + 1); + + Vector256 r2 = Avx.InsertVector128( + Unsafe.As>(ref this.V2L).ToVector256(), + Unsafe.As>(ref this.V6L), + 1); + + Vector256 r3 = Avx.InsertVector128( + Unsafe.As>(ref this.V3L).ToVector256(), + Unsafe.As>(ref this.V7L), + 1); + + Vector256 r4 = Avx.InsertVector128( + Unsafe.As>(ref this.V0R).ToVector256(), + Unsafe.As>(ref this.V4R), + 1); + + Vector256 r5 = Avx.InsertVector128( + Unsafe.As>(ref this.V1R).ToVector256(), + Unsafe.As>(ref this.V5R), + 1); + + Vector256 r6 = Avx.InsertVector128( + Unsafe.As>(ref this.V2R).ToVector256(), + Unsafe.As>(ref this.V6R), + 1); + + Vector256 r7 = Avx.InsertVector128( + Unsafe.As>(ref this.V3R).ToVector256(), + Unsafe.As>(ref this.V7R), + 1); + + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t2 = Avx.UnpackLow(r2, r3); + Vector256 v = Avx.Shuffle(t0, t2, 0x4E); + Unsafe.As>(ref d.V0L) = Avx.Blend(t0, v, 0xCC); + Unsafe.As>(ref d.V1L) = Avx.Blend(t2, v, 0x33); + + Vector256 t4 = Avx.UnpackLow(r4, r5); + Vector256 t6 = Avx.UnpackLow(r6, r7); + v = Avx.Shuffle(t4, t6, 0x4E); + Unsafe.As>(ref d.V4L) = Avx.Blend(t4, v, 0xCC); + Unsafe.As>(ref d.V5L) = Avx.Blend(t6, v, 0x33); + + Vector256 t1 = Avx.UnpackHigh(r0, r1); + Vector256 t3 = Avx.UnpackHigh(r2, r3); + v = Avx.Shuffle(t1, t3, 0x4E); + Unsafe.As>(ref d.V2L) = Avx.Blend(t1, v, 0xCC); + Unsafe.As>(ref d.V3L) = Avx.Blend(t3, v, 0x33); + + Vector256 t5 = Avx.UnpackHigh(r4, r5); + Vector256 t7 = Avx.UnpackHigh(r6, r7); + v = Avx.Shuffle(t5, t7, 0x4E); + Unsafe.As>(ref d.V6L) = Avx.Blend(t5, v, 0xCC); + Unsafe.As>(ref d.V7L) = Avx.Blend(t7, v, 0x33); + } + else +#endif + { + d.V0L.X = this.V0L.X; + d.V1L.X = this.V0L.Y; + d.V2L.X = this.V0L.Z; + d.V3L.X = this.V0L.W; + d.V4L.X = this.V0R.X; + d.V5L.X = this.V0R.Y; + d.V6L.X = this.V0R.Z; + d.V7L.X = this.V0R.W; + + d.V0L.Y = this.V1L.X; + d.V1L.Y = this.V1L.Y; + d.V2L.Y = this.V1L.Z; + d.V3L.Y = this.V1L.W; + d.V4L.Y = this.V1R.X; + d.V5L.Y = this.V1R.Y; + d.V6L.Y = this.V1R.Z; + d.V7L.Y = this.V1R.W; + + d.V0L.Z = this.V2L.X; + d.V1L.Z = this.V2L.Y; + d.V2L.Z = this.V2L.Z; + d.V3L.Z = this.V2L.W; + d.V4L.Z = this.V2R.X; + d.V5L.Z = this.V2R.Y; + d.V6L.Z = this.V2R.Z; + d.V7L.Z = this.V2R.W; + + d.V0L.W = this.V3L.X; + d.V1L.W = this.V3L.Y; + d.V2L.W = this.V3L.Z; + d.V3L.W = this.V3L.W; + d.V4L.W = this.V3R.X; + d.V5L.W = this.V3R.Y; + d.V6L.W = this.V3R.Z; + d.V7L.W = this.V3R.W; + + d.V0R.X = this.V4L.X; + d.V1R.X = this.V4L.Y; + d.V2R.X = this.V4L.Z; + d.V3R.X = this.V4L.W; + d.V4R.X = this.V4R.X; + d.V5R.X = this.V4R.Y; + d.V6R.X = this.V4R.Z; + d.V7R.X = this.V4R.W; + + d.V0R.Y = this.V5L.X; + d.V1R.Y = this.V5L.Y; + d.V2R.Y = this.V5L.Z; + d.V3R.Y = this.V5L.W; + d.V4R.Y = this.V5R.X; + d.V5R.Y = this.V5R.Y; + d.V6R.Y = this.V5R.Z; + d.V7R.Y = this.V5R.W; + + d.V0R.Z = this.V6L.X; + d.V1R.Z = this.V6L.Y; + d.V2R.Z = this.V6L.Z; + d.V3R.Z = this.V6L.W; + d.V4R.Z = this.V6R.X; + d.V5R.Z = this.V6R.Y; + d.V6R.Z = this.V6R.Z; + d.V7R.Z = this.V6R.W; + + d.V0R.W = this.V7L.X; + d.V1R.W = this.V7L.Y; + d.V2R.W = this.V7L.Z; + d.V3R.W = this.V7L.W; + d.V4R.W = this.V7R.X; + d.V5R.W = this.V7R.Y; + d.V6R.W = this.V7R.Z; + d.V7R.W = this.V7R.W; + } + } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs new file mode 100644 index 000000000..90ebce3b8 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs @@ -0,0 +1,18 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class Avx2JpegColorConverter : VectorizedJpegColorConverter + { + protected Avx2JpegColorConverter(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision, 8) + { + } + + protected sealed override bool IsAvailable => SimdUtils.HasAvx2; + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs new file mode 100644 index 000000000..ed2e2cd76 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs @@ -0,0 +1,18 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class BasicJpegColorConverter : JpegColorConverter + { + protected BasicJpegColorConverter(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision) + { + } + + protected override bool IsAvailable => true; + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs new file mode 100644 index 000000000..f9334de73 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs @@ -0,0 +1,81 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromCmykAvx2 : Avx2JpegColorConverter + { + public FromCmykAvx2(int precision) + : base(JpegColorSpace.Cmyk, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 cBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 mBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector256 kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / this.MaximumValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol); + Vector256 c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol); + Vector256 m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol); + Vector256 y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol); + + k = Avx.Multiply(k, scale); + + c = Avx.Multiply(Avx.Multiply(c, k), scale); + m = Avx.Multiply(Avx.Multiply(m, k), scale); + y = Avx.Multiply(Avx.Multiply(y, k), scale); + + Vector256 cmLo = Avx.UnpackLow(c, m); + Vector256 yoLo = Avx.UnpackLow(y, one); + Vector256 cmHi = Avx.UnpackHigh(c, m); + Vector256 yoHi = Avx.UnpackHigh(y, one); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00); + Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10); + Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00); + Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromCmykBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmyk.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs similarity index 75% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmyk.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs index 7b257b37d..6cbd52ec3 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmyk.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs @@ -8,16 +8,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromCmyk : JpegColorConverter + internal sealed class FromCmykBasic : BasicJpegColorConverter { - public FromCmyk(int precision) + public FromCmykBasic(int precision) : base(JpegColorSpace.Cmyk, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) { - // TODO: We can optimize a lot here with Vector and SRCS.Unsafe()! + ConvertCore(values, result, this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { ReadOnlySpan cVals = values.Component0; ReadOnlySpan mVals = values.Component1; ReadOnlySpan yVals = values.Component2; @@ -25,7 +29,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters var v = new Vector4(0, 0, 0, 1F); - var maximum = 1 / this.MaximumValue; + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); for (int i = 0; i < result.Length; i++) @@ -33,7 +37,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters float c = cVals[i]; float m = mVals[i]; float y = yVals[i]; - float k = kVals[i] / this.MaximumValue; + float k = kVals[i] / maxValue; v.X = c * k; v.Y = m * k; @@ -47,4 +51,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs new file mode 100644 index 000000000..e75634b0f --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs @@ -0,0 +1,71 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromCmykVector8 : Vector8JpegColorConverter + { + public FromCmykVector8(int precision) + : base(JpegColorSpace.Cmyk, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector cBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector mBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair cc = default; + Vector4Pair mm = default; + Vector4Pair yy = default; + ref Vector ccRefAsVector = ref Unsafe.As>(ref cc); + ref Vector mmRefAsVector = ref Unsafe.As>(ref mm); + ref Vector yyRefAsVector = ref Unsafe.As>(ref yy); + + var scale = new Vector(1 / this.MaximumValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector c = Unsafe.Add(ref cBase, i); + Vector m = Unsafe.Add(ref mBase, i); + Vector y = Unsafe.Add(ref yBase, i); + Vector k = Unsafe.Add(ref kBase, i) * scale; + + c = (c * k) * scale; + m = (m * k) * scale; + y = (y * k) * scale; + + ccRefAsVector = c; + mmRefAsVector = m; + yyRefAsVector = y; + + // Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref cc, ref mm, ref yy); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromCmykBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs new file mode 100644 index 000000000..45846a6b5 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs @@ -0,0 +1,63 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromGrayscaleAvx2 : Avx2JpegColorConverter + { + public FromGrayscaleAvx2(int precision) + : base(JpegColorSpace.Grayscale, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / this.MaximumValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 g = Avx.Multiply(Unsafe.Add(ref gBase, i), scale); + + g = Avx2.PermuteVar8x32(g, vcontrol); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); + Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Shuffle(g, g, 0b01_01_01_01), one, 0b1000_1000); + Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Shuffle(g, g, 0b10_10_10_10), one, 0b1000_1000); + Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Shuffle(g, g, 0b11_11_11_11), one, 0b1000_1000); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScale.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs similarity index 74% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScale.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs index cf0bc2c92..0b7a220d9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScale.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs @@ -10,16 +10,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromGrayscale : JpegColorConverter + internal sealed class FromGrayscaleBasic : BasicJpegColorConverter { - public FromGrayscale(int precision) + public FromGrayscaleBasic(int precision) : base(JpegColorSpace.Grayscale, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) { - var maximum = 1 / this.MaximumValue; + ConvertCore(values, result, this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); ref float sBase = ref MemoryMarshal.GetReference(values.Component0); @@ -35,4 +40,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs new file mode 100644 index 000000000..8f04c9152 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs @@ -0,0 +1,72 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromRgbAvx2 : Avx2JpegColorConverter + { + public FromRgbAvx2(int precision) + : base(JpegColorSpace.RGB, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 rBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 bBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / this.MaximumValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale); + Vector256 g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale); + Vector256 b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale); + + Vector256 rgLo = Avx.UnpackLow(r, g); + Vector256 boLo = Avx.UnpackLow(b, one); + Vector256 rgHi = Avx.UnpackHigh(r, g); + Vector256 boHi = Avx.UnpackHigh(b, one); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00); + Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10); + Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00); + Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromRgbBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgb.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs similarity index 76% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgb.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs index 25889a6df..ddca3fe2f 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgb.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs @@ -8,23 +8,27 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromRgb : JpegColorConverter + internal sealed class FromRgbBasic : BasicJpegColorConverter { - public FromRgb(int precision) + public FromRgbBasic(int precision) : base(JpegColorSpace.RGB, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) { - // TODO: We can optimize a lot here with Vector and SRCS.Unsafe()! + ConvertCore(values, result, this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { ReadOnlySpan rVals = values.Component0; ReadOnlySpan gVals = values.Component1; ReadOnlySpan bVals = values.Component2; var v = new Vector4(0, 0, 0, 1); - var maximum = 1 / this.MaximumValue; + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); for (int i = 0; i < result.Length; i++) @@ -44,4 +48,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs new file mode 100644 index 000000000..763064d1e --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs @@ -0,0 +1,67 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromRgbVector8 : Vector8JpegColorConverter + { + public FromRgbVector8(int precision) + : base(JpegColorSpace.RGB, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector rBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector bBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / this.MaximumValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector r = Unsafe.Add(ref rBase, i); + Vector g = Unsafe.Add(ref gBase, i); + Vector b = Unsafe.Add(ref bBase, i); + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromRgbBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs new file mode 100644 index 000000000..f3a063620 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs @@ -0,0 +1,101 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +// ReSharper disable ImpureMethodCallOnReadonlyValueField +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYCbCrAvx2 : Avx2JpegColorConverter + { + public FromYCbCrAvx2(int precision) + : base(JpegColorSpace.YCbCr, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + #if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-this.HalfValue); + var scale = Vector256.Create(1 / this.MaximumValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + Vector256 y = Unsafe.Add(ref yBase, i); + Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); + Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + + y = Avx2.PermuteVar8x32(y, vcontrol); + cb = Avx2.PermuteVar8x32(cb, vcontrol); + cr = Avx2.PermuteVar8x32(cr, vcontrol); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + // TODO: We should be saving to RGBA not Vector4 + r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); + g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); + b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs index 31fc05461..352e4acb7 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs @@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYCbCrBasic : JpegColorConverter + internal sealed class FromYCbCrBasic : BasicJpegColorConverter { public FromYCbCrBasic(int precision) : base(JpegColorSpace.YCbCr, precision) @@ -48,4 +48,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs similarity index 56% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs index 541a03615..42f8eef5a 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs @@ -5,37 +5,24 @@ using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; - using SixLabors.ImageSharp.Tuples; namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYCbCrSimd : JpegColorConverter + internal sealed class FromYCbCrVector4 : VectorizedJpegColorConverter { - public FromYCbCrSimd(int precision) - : base(JpegColorSpace.YCbCr, precision) + public FromYCbCrVector4(int precision) + : base(JpegColorSpace.YCbCr, precision, 8) { } - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); - } + protected override bool IsAvailable => SimdUtils.HasVector4; - FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); - } - - /// - /// SIMD convert using buffers of sizes divisible by 8. - /// - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) { + // TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector is terrible?) DebugGuard.IsTrue(result.Length % 8 == 0, nameof(result), "result.Length should be divisible by 8!"); ref Vector4Pair yBase = @@ -48,7 +35,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ref Vector4Octet resultBase = ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - var chromaOffset = new Vector4(-halfValue); + var chromaOffset = new Vector4(-this.HalfValue); + var maxValue = this.MaximumValue; // Walking 8 elements at one step: int n = result.Length / 8; @@ -87,31 +75,18 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters tmp.MultiplyInplace(1.772F); b.AddInplace(ref tmp); - if (Vector.Count == 4) - { - // TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector is terrible?) - r.RoundAndDownscalePreVector8(maxValue); - g.RoundAndDownscalePreVector8(maxValue); - b.RoundAndDownscalePreVector8(maxValue); - } - else if (SimdUtils.HasVector8) - { - r.RoundAndDownscaleVector8(maxValue); - g.RoundAndDownscaleVector8(maxValue); - b.RoundAndDownscaleVector8(maxValue); - } - else - { - // TODO: Run fallback scalar code here - // However, no issues expected before someone implements this: https://github.com/dotnet/coreclr/issues/12007 - JpegThrowHelper.ThrowNotImplementedException("Your CPU architecture is too modern!"); - } + r.RoundAndDownscalePreVector8(maxValue); + g.RoundAndDownscalePreVector8(maxValue); + b.RoundAndDownscalePreVector8(maxValue); // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); destination.Pack(ref r, ref g, ref b); } } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs similarity index 66% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs index c4d1408a2..abacf7161 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs @@ -1,11 +1,10 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; - using SixLabors.ImageSharp.Tuples; // ReSharper disable ImpureMethodCallOnReadonlyValueField @@ -13,40 +12,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYCbCrSimdVector8 : JpegColorConverter + internal sealed class FromYCbCrVector8 : Vector8JpegColorConverter { - public FromYCbCrSimdVector8(int precision) + public FromYCbCrVector8(int precision) : base(JpegColorSpace.YCbCr, precision) { } - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); - } - - FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); - } - - /// - /// SIMD convert using buffers of sizes divisible by 8. - /// - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - ref Vector yBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); ref Vector cbBase = @@ -57,7 +31,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ref Vector4Octet resultBase = ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - var chromaOffset = new Vector(-halfValue); + var chromaOffset = new Vector(-this.HalfValue); // Walking 8 elements at one step: int n = result.Length / 8; @@ -70,7 +44,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); - var scale = new Vector(1 / maxValue); + var scale = new Vector(1 / this.MaximumValue); for (int i = 0; i < n; i++) { @@ -105,6 +79,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters destination.Pack(ref rr, ref gg, ref bb); } } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs new file mode 100644 index 000000000..ea0132e1e --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs @@ -0,0 +1,110 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYccKAvx2 : Avx2JpegColorConverter + { + public FromYccKAvx2(int precision) + : base(JpegColorSpace.Ycck, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector256 kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-this.HalfValue); + var scale = Vector256.Create(1 / this.MaximumValue); + var max = Vector256.Create(this.MaximumValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + // k = kVals[i] / 256F; + Vector256 y = Unsafe.Add(ref yBase, i); + Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); + Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + Vector256 k = Avx.Divide(Unsafe.Add(ref kBase, i), max); + + y = Avx2.PermuteVar8x32(y, vcontrol); + cb = Avx2.PermuteVar8x32(cb, vcontrol); + cr = Avx2.PermuteVar8x32(cr, vcontrol); + k = Avx2.PermuteVar8x32(k, vcontrol); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = + HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + r = Avx.Subtract(max, Avx.RoundToNearestInteger(r)); + g = Avx.Subtract(max, Avx.RoundToNearestInteger(g)); + b = Avx.Subtract(max, Avx.RoundToNearestInteger(b)); + + r = Avx.Multiply(Avx.Multiply(r, k), scale); + g = Avx.Multiply(Avx.Multiply(g, k), scale); + b = Avx.Multiply(Avx.Multiply(b, k), scale); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccK.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs similarity index 57% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccK.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs index 1137cdc0e..778e5325f 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccK.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs @@ -8,14 +8,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYccK : JpegColorConverter + internal sealed class FromYccKBasic : BasicJpegColorConverter { - public FromYccK(int precision) + public FromYccKBasic(int precision) : base(JpegColorSpace.Ycck, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) + { + ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) { // TODO: We can optimize a lot here with Vector and SRCS.Unsafe()! ReadOnlySpan yVals = values.Component0; @@ -25,19 +30,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters var v = new Vector4(0, 0, 0, 1F); - var maximum = 1 / this.MaximumValue; + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); for (int i = 0; i < result.Length; i++) { float y = yVals[i]; - float cb = cbVals[i] - this.HalfValue; - float cr = crVals[i] - this.HalfValue; - float k = kVals[i] / this.MaximumValue; + float cb = cbVals[i] - halfValue; + float cr = crVals[i] - halfValue; + float k = kVals[i] / maxValue; - v.X = (this.MaximumValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k; - v.Y = (this.MaximumValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k; - v.Z = (this.MaximumValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k; + v.X = (maxValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k; + v.Y = (maxValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k; + v.Z = (maxValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k; v.W = 1F; v *= scale; @@ -47,4 +52,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs new file mode 100644 index 000000000..c360392de --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs @@ -0,0 +1,91 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYccKVector8 : Vector8JpegColorConverter + { + public FromYccKVector8(int precision) + : base(JpegColorSpace.Ycck, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + var chromaOffset = new Vector(-this.HalfValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / this.MaximumValue); + var max = new Vector(this.MaximumValue); + + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + // k = kVals[i] / 256F; + Vector y = Unsafe.Add(ref yBase, i); + Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; + Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; + Vector k = Unsafe.Add(ref kBase, i) / max; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector r = y + (cr * new Vector(1.402F)); + Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); + Vector b = y + (cb * new Vector(1.772F)); + + r = (max - r.FastRound()) * k; + g = (max - g.FastRound()) * k; + b = (max - b.FastRound()) * k; + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs new file mode 100644 index 000000000..3e9b889db --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs @@ -0,0 +1,18 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class Vector8JpegColorConverter : VectorizedJpegColorConverter + { + protected Vector8JpegColorConverter(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision, 8) + { + } + + protected sealed override bool IsAvailable => SimdUtils.HasVector8; + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs new file mode 100644 index 000000000..522be82c2 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs @@ -0,0 +1,46 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class VectorizedJpegColorConverter : JpegColorConverter + { + private readonly int vectorSize; + + protected VectorizedJpegColorConverter(JpegColorSpace colorSpace, int precision, int vectorSize) + : base(colorSpace, precision) + { + this.vectorSize = vectorSize; + } + + public sealed override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % this.vectorSize; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!this.IsAvailable) + { + throw new InvalidOperationException( + "This converter can be used only on architecture having 256 byte floating point SIMD registers!"); + } + + this.ConvertCoreVectorized(values.Slice(0, simdCount), result.Slice(0, simdCount)); + } + + this.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder)); + } + + protected abstract void ConvertCoreVectorized(in ComponentValues values, Span result); + + protected abstract void ConvertCore(in ComponentValues values, Span result); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index f68bca041..2d24f01dd 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -3,8 +3,8 @@ using System; using System.Collections.Generic; +using System.Linq; using System.Numerics; - using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Tuples; @@ -18,22 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters /// /// The available converters /// - private static readonly JpegColorConverter[] Converters = - { - // 8-bit converters - GetYCbCrConverter(8), - new FromYccK(8), - new FromCmyk(8), - new FromGrayscale(8), - new FromRgb(8), - - // 12-bit converters - GetYCbCrConverter(12), - new FromYccK(12), - new FromCmyk(12), - new FromGrayscale(12), - new FromRgb(12), - }; + private static readonly JpegColorConverter[] Converters = CreateConverters(); /// /// Initializes a new instance of the class. @@ -46,6 +31,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters this.HalfValue = MathF.Ceiling(this.MaximumValue / 2); } + /// + /// Gets a value indicating whether this is available + /// on the current runtime and CPU architecture. + /// + protected abstract bool IsAvailable { get; } + /// /// Gets the of this converter. /// @@ -71,8 +62,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters /// public static JpegColorConverter GetConverter(JpegColorSpace colorSpace, int precision) { - JpegColorConverter converter = Array.Find(Converters, c => c.ColorSpace == colorSpace - && c.Precision == precision); + JpegColorConverter converter = Array.Find( + Converters, + c => c.ColorSpace == colorSpace + && c.Precision == precision); if (converter is null) { @@ -90,10 +83,88 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters public abstract void ConvertToRgba(in ComponentValues values, Span result); /// - /// Returns the for the YCbCr colorspace that matches the current CPU architecture. + /// Returns the s for all supported colorspaces and precisions. + /// + private static JpegColorConverter[] CreateConverters() + { + var converters = new List(); + + // 8-bit converters + converters.AddRange(GetYCbCrConverters(8)); + converters.AddRange(GetYccKConverters(8)); + converters.AddRange(GetCmykConverters(8)); + converters.AddRange(GetGrayScaleConverters(8)); + converters.AddRange(GetRgbConverters(8)); + + // 12-bit converters + converters.AddRange(GetYCbCrConverters(12)); + converters.AddRange(GetYccKConverters(12)); + converters.AddRange(GetCmykConverters(12)); + converters.AddRange(GetGrayScaleConverters(12)); + converters.AddRange(GetRgbConverters(12)); + + return converters.Where(x => x.IsAvailable).ToArray(); + } + + /// + /// Returns the s for the YCbCr colorspace. + /// + private static IEnumerable GetYCbCrConverters(int precision) + { +#if SUPPORTS_RUNTIME_INTRINSICS + yield return new FromYCbCrAvx2(precision); +#endif + yield return new FromYCbCrVector8(precision); + yield return new FromYCbCrVector4(precision); + yield return new FromYCbCrBasic(precision); + } + + /// + /// Returns the s for the YccK colorspace. + /// + private static IEnumerable GetYccKConverters(int precision) + { +#if SUPPORTS_RUNTIME_INTRINSICS + yield return new FromYccKAvx2(precision); +#endif + yield return new FromYccKVector8(precision); + yield return new FromYccKBasic(precision); + } + + /// + /// Returns the s for the CMYK colorspace. + /// + private static IEnumerable GetCmykConverters(int precision) + { +#if SUPPORTS_RUNTIME_INTRINSICS + yield return new FromCmykAvx2(precision); +#endif + yield return new FromCmykVector8(precision); + yield return new FromCmykBasic(precision); + } + + /// + /// Returns the s for the gray scale colorspace. + /// + private static IEnumerable GetGrayScaleConverters(int precision) + { +#if SUPPORTS_RUNTIME_INTRINSICS + yield return new FromGrayscaleAvx2(precision); +#endif + yield return new FromGrayscaleBasic(precision); + } + + /// + /// Returns the s for the RGB colorspace. /// - private static JpegColorConverter GetYCbCrConverter(int precision) => - FromYCbCrSimdVector8.IsAvailable ? (JpegColorConverter)new FromYCbCrSimdVector8(precision) : new FromYCbCrSimd(precision); + private static IEnumerable GetRgbConverters(int precision) + { +#if SUPPORTS_RUNTIME_INTRINSICS + yield return new FromRgbAvx2(precision); +#endif + yield return new FromRgbVector8(precision); + yield return new FromRgbBasic(precision); + } /// /// A stack-only struct to reference the input buffers using -s. @@ -230,6 +301,52 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters this.V7.Z = b.B.W; this.V7.W = 1f; } + + /// + /// Pack (g0,g1...g7) vector values as (g0,g0,g0,1), (g1,g1,g1,1) ... + /// + public void Pack(ref Vector4Pair g) + { + this.V0.X = g.A.X; + this.V0.Y = g.A.X; + this.V0.Z = g.A.X; + this.V0.W = 1f; + + this.V1.X = g.A.Y; + this.V1.Y = g.A.Y; + this.V1.Z = g.A.Y; + this.V1.W = 1f; + + this.V2.X = g.A.Z; + this.V2.Y = g.A.Z; + this.V2.Z = g.A.Z; + this.V2.W = 1f; + + this.V3.X = g.A.W; + this.V3.Y = g.A.W; + this.V3.Z = g.A.W; + this.V3.W = 1f; + + this.V4.X = g.B.X; + this.V4.Y = g.B.X; + this.V4.Z = g.B.X; + this.V4.W = 1f; + + this.V5.X = g.B.Y; + this.V5.Y = g.B.Y; + this.V5.Z = g.B.Y; + this.V5.W = 1f; + + this.V6.X = g.B.Z; + this.V6.Y = g.B.Z; + this.V6.Z = g.B.Z; + this.V6.W = 1f; + + this.V7.X = g.B.W; + this.V7.Y = g.B.W; + this.V7.Z = g.B.W; + this.V7.W = 1f; + } } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs index 40683e25a..e0311dafe 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs @@ -81,14 +81,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder b.LoadFrom(ref sourceBlock); // Dequantize: - b.MultiplyInplace(ref this.DequantiazationTable); + b.MultiplyInPlace(ref this.DequantiazationTable); FastFloatingPointDCT.TransformIDCT(ref b, ref this.WorkspaceBlock1, ref this.WorkspaceBlock2); // To conform better to libjpeg we actually NEED TO loose precision here. // This is because they store blocks as Int16 between all the operations. // To be "more accurate", we need to emulate this by rounding! - this.WorkspaceBlock1.NormalizeColorsAndRoundInplace(maximumValue); + this.WorkspaceBlock1.NormalizeColorsAndRoundInPlace(maximumValue); this.WorkspaceBlock1.ScaledCopyTo( ref destAreaOrigin, diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index ee06f2bde..a6d0622dd 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System.Numerics; @@ -50,8 +50,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// Temporary block provided by the caller public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp) { - // TODO: Transpose is a bottleneck now. We need full AVX support to optimize it: - // https://github.com/dotnet/corefx/issues/22940 src.TransposeInto(ref temp); IDCT8x4_LeftPart(ref temp, ref dest); @@ -63,7 +61,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components IDCT8x4_RightPart(ref temp, ref dest); // TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing? - dest.MultiplyInplace(C_0_125); + dest.MultiplyInPlace(C_0_125); } /// @@ -326,7 +324,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components src.TransposeInto(ref temp); if (offsetSourceByNeg128) { - temp.AddToAllInplace(new Vector4(-128)); + temp.AddInPlace(-128F); } FDCT8x4_LeftPart(ref temp, ref dest); @@ -337,7 +335,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components FDCT8x4_LeftPart(ref temp, ref dest); FDCT8x4_RightPart(ref temp, ref dest); - dest.MultiplyInplace(C_0_125); + dest.MultiplyInPlace(C_0_125); } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs b/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs index b0311f088..9342e09df 100644 --- a/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs +++ b/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs @@ -35,6 +35,15 @@ namespace SixLabors.ImageSharp.Formats.Png options.ColorType ??= pngMetadata.ColorType ?? SuggestColorType(); options.BitDepth ??= pngMetadata.BitDepth ?? SuggestBitDepth(); + // Ensure bit depth and color type are a supported combination. + // Bit8 is the only bit depth supported by all color types. + byte bits = (byte)options.BitDepth; + byte[] validBitDepths = PngConstants.ColorTypes[options.ColorType.Value]; + if (Array.IndexOf(validBitDepths, bits) == -1) + { + options.BitDepth = PngBitDepth.Bit8; + } + options.InterlaceMethod ??= pngMetadata.InterlaceMethod; use16Bit = options.BitDepth == PngBitDepth.Bit16; @@ -44,12 +53,6 @@ namespace SixLabors.ImageSharp.Formats.Png { options.ChunkFilter = PngChunkFilter.ExcludeAll; } - - // Ensure we are not allowing impossible combinations. - if (!PngConstants.ColorTypes.ContainsKey(options.ColorType.Value)) - { - throw new NotSupportedException("Color type is not supported or not valid."); - } } /// @@ -68,15 +71,10 @@ namespace SixLabors.ImageSharp.Formats.Png return null; } - byte bits = (byte)options.BitDepth; - if (Array.IndexOf(PngConstants.ColorTypes[options.ColorType.Value], bits) == -1) - { - throw new NotSupportedException("Bit depth is not supported or not valid."); - } - // Use the metadata to determine what quantization depth to use if no quantizer has been set. if (options.Quantizer is null) { + byte bits = (byte)options.BitDepth; var maxColors = ImageMaths.GetColorCountForBitDepth(bits); options.Quantizer = new WuQuantizer(new QuantizerOptions { MaxColors = maxColors }); } diff --git a/src/ImageSharp/Image{TPixel}.cs b/src/ImageSharp/Image{TPixel}.cs index 255193c8e..83ecc3753 100644 --- a/src/ImageSharp/Image{TPixel}.cs +++ b/src/ImageSharp/Image{TPixel}.cs @@ -201,14 +201,14 @@ namespace SixLabors.ImageSharp public bool TryGetSinglePixelSpan(out Span span) { IMemoryGroup mg = this.GetPixelMemoryGroup(); - if (mg.Count > 1) + if (mg.Count == 1) { - span = default; - return false; + span = mg[0].Span; + return true; } - span = mg.Single().Span; - return true; + span = default; + return false; } /// diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs index 0b1292b64..d30616997 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs @@ -53,84 +53,112 @@ namespace SixLabors.ImageSharp.PixelFormats Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale)); } /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgba32(source, dest); } /// - public override void FromRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToArgb32(source, dest); } /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgra32(source, dest); } /// - public override void FromBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToArgb32(source, dest); + } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgb24(source, dest); } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Argb32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToArgb32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Argb32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgr24(source, dest); + } - dp.FromArgb32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToArgb32(source, dest); } /// @@ -205,24 +233,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Argb32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Argb32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromArgb32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs index b73bb8b83..50d4942ec 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs @@ -52,146 +52,182 @@ namespace SixLabors.ImageSharp.PixelFormats { Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale | PixelConversionModifiers.Premultiply)); } - /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Argb32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgba32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Argb32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgr24(source, dest); } - /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgra32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToArgb32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgra32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgr24(source, dest); } - /// - public override void ToL8(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref L8 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToBgra32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref L8 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToBgr24(source, dest); } - /// - public override void ToL16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref L16 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgb24(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref L16 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgr24(source, dest); } /// - public override void ToLa16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToL8(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref La16 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref L8 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref La16 dp = ref Unsafe.Add(ref destRef, i); + ref L8 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToLa32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToL16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref La32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref L16 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref La32 dp = ref Unsafe.Add(ref destRef, i); + ref L16 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToLa16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref La16 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); + ref La16 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToLa32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgba32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref La32 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgba32 dp = ref Unsafe.Add(ref destRef, i); + ref La32 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs index 5bdd10404..b38e5f19d 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs @@ -53,84 +53,112 @@ namespace SixLabors.ImageSharp.PixelFormats Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale)); } /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgba32(source, dest); } /// - public override void FromRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgra32(source, dest); } /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToArgb32(source, dest); } /// - public override void FromArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgra32(source, dest); + } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgb24(source, dest); } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgra32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgra32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgra32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToBgr24(source, dest); + } - dp.FromBgra32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToBgra32(source, dest); } /// @@ -205,24 +233,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Bgra32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgra32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromBgra32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs index 332683fc7..9a4173892 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs @@ -52,59 +52,114 @@ namespace SixLabors.ImageSharp.PixelFormats { Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale | PixelConversionModifiers.Premultiply)); } - /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Argb32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToRgba32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Argb32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromRgb24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToRgb24(source, dest); } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToArgb32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromRgb24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgb24(source, dest); + } + /// + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgra32(source, dest); } /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgra32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgb24(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgra32 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgr24(source, dest); + } - dp.FromRgb24(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgb24(source, dest); } /// @@ -179,24 +234,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgba32 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgba32 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromRgb24(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs index b05c62f1f..5b60ec10e 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs @@ -42,84 +42,112 @@ namespace SixLabors.ImageSharp.PixelFormats } /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToArgb32(source, dest); } /// - public override void FromArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgba32(source, dest); } /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgra32(source, dest); } /// - public override void FromBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgba32(source, dest); + } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToRgb24(source, dest); } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgba32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToRgba32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgba32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgr24(source, dest); + } - dp.FromRgba32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgba32(source, dest); } /// @@ -194,24 +222,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Rgba32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgba32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromRgba32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude index 5d56731ba..b728b0115 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude @@ -17,7 +17,7 @@ using System.Runtime.InteropServices; <#+ static readonly string[] CommonPixelTypes = { "Argb32", "Bgr24", "Bgra32", "L8", "L16", "La16", "La32", "Rgb24", "Rgba32", "Rgb48", "Rgba64", "Bgra5551" }; - static readonly string[] Optimized32BitTypes = { "Rgba32", "Argb32", "Bgra32" }; + static readonly string[] OptimizedPixelTypes = { "Rgba32", "Argb32", "Bgra32", "Rgb24", "Bgr24" }; // Types with Rgba32-combatible to/from Vector4 conversion static readonly string[] Rgba32CompatibleTypes = { "Argb32", "Bgra32", "Rgb24", "Bgr24" }; @@ -88,35 +88,31 @@ using System.Runtime.InteropServices; { #> /// - public override void To<#=otherPixelType#>(Configuration configuration, ReadOnlySpan<<#=thisPixelType#>> sourcePixels, Span<<#=otherPixelType#>> destinationPixels) + public override void To<#=otherPixelType#>( + Configuration configuration, + ReadOnlySpan<<#=thisPixelType#>> sourcePixels, + Span<<#=otherPixelType#>> destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As<<#=thisPixelType#>,uint>(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As<<#=otherPixelType#>, uint>(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.From<#=thisPixelType#>.To<#=otherPixelType#>(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast<<#=thisPixelType#>, byte>(sourcePixels); + Span dest = MemoryMarshal.Cast<<#=otherPixelType#>, byte>(destinationPixels); + PixelConverter.From<#=thisPixelType#>.To<#=otherPixelType#>(source, dest); } /// - public override void From<#=otherPixelType#>(Configuration configuration, ReadOnlySpan<<#=otherPixelType#>> sourcePixels, Span<<#=thisPixelType#>> destinationPixels) + public override void From<#=otherPixelType#>( + Configuration configuration, + ReadOnlySpan<<#=otherPixelType#>> sourcePixels, + Span<<#=thisPixelType#>> destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As<<#=otherPixelType#>,uint>(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As<<#=thisPixelType#>, uint>(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.From<#=otherPixelType#>.To<#=thisPixelType#>(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast<<#=otherPixelType#>, byte>(sourcePixels); + Span dest = MemoryMarshal.Cast<<#=thisPixelType#>, byte>(destinationPixels); + PixelConverter.From<#=otherPixelType#>.To<#=thisPixelType#>(source, dest); } <#+ } @@ -152,8 +148,8 @@ using System.Runtime.InteropServices; GenerateRgba32CompatibleVector4ConversionMethods(pixelType, pixelType.EndsWith("32")); } - var matching32BitTypes = Optimized32BitTypes.Contains(pixelType) ? - Optimized32BitTypes.Where(p => p != pixelType) : + var matching32BitTypes = OptimizedPixelTypes.Contains(pixelType) ? + OptimizedPixelTypes.Where(p => p != pixelType) : Enumerable.Empty(); foreach (string destPixelType in matching32BitTypes) diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index 814264084..7215fa860 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -1,7 +1,7 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. -using System.Buffers.Binary; +using System; using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.PixelFormats.Utils @@ -21,88 +21,196 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils public static class FromRgba32 { /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToArgb32(uint packedRgba) - { - // packedRgba = [aa bb gg rr] - // ROTL(8, packedRgba) = [bb gg rr aa] - return (packedRgba << 8) | (packedRgba >> 24); - } + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4(source, dest, default); /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToBgra32(uint packedRgba) - { - // packedRgba = [aa bb gg rr] - // tmp1 = [aa 00 gg 00] - // tmp2 = [00 bb 00 rr] - // tmp3=ROTL(16, tmp2) = [00 rr 00 bb] - // tmp1 + tmp3 = [aa rr gg bb] - uint tmp1 = packedRgba & 0xFF00FF00; - uint tmp2 = packedRgba & 0x00FF00FF; - uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); - return tmp1 + tmp3; - } + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); } public static class FromArgb32 { /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToRgba32(uint packedArgb) - { - // packedArgb = [bb gg rr aa] - // ROTR(8, packedArgb) = [aa bb gg rr] - return (packedArgb >> 8) | (packedArgb << 24); - } + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 3, 2, 1)); /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToBgra32(uint packedArgb) - { - // packedArgb = [bb gg rr aa] - // REVERSE(packedArgb) = [aa rr gg bb] - return BinaryPrimitives.ReverseEndianness(packedArgb); - } + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 1, 2, 3)); } public static class FromBgra32 { /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, default); + } + + public static class FromRgb24 + { + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(2, 1, 0, 3)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); + } + + public static class FromBgr24 + { + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(0, 1, 2, 3)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToArgb32(uint packedBgra) - { - // packedBgra = [aa rr gg bb] - // REVERSE(packedBgra) = [bb gg rr aa] - return BinaryPrimitives.ReverseEndianness(packedBgra); - } + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, default); /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToRgba32(uint packedBgra) - { - // packedRgba = [aa rr gg bb] - // tmp1 = [aa 00 gg 00] - // tmp2 = [00 rr 00 bb] - // tmp3=ROTL(16, tmp2) = [00 bb 00 rr] - // tmp1 + tmp3 = [aa bb gg rr] - uint tmp1 = packedBgra & 0xFF00FF00; - uint tmp2 = packedBgra & 0x00FF00FF; - uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); - return tmp1 + tmp3; - } + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Processing/Processors/Binarization/AdaptiveThresholdProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Binarization/AdaptiveThresholdProcessor{TPixel}.cs index 43023c938..6d95d51b3 100644 --- a/src/ImageSharp/Processing/Processors/Binarization/AdaptiveThresholdProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Binarization/AdaptiveThresholdProcessor{TPixel}.cs @@ -67,7 +67,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Binarization ref TPixel color = ref Unsafe.Add(ref rowRef, x); color.ToRgba32(ref rgb); - sum += (ulong)(rgb.R + rgb.G + rgb.G); + sum += (ulong)(rgb.R + rgb.G + rgb.B); + if (x - startX != 0) { intImage[x - startX, y - startY] = intImage[x - startX - 1, y - startY] + sum; diff --git a/src/ImageSharp/Processing/Processors/Normalization/AdaptiveHistogramEqualizationProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Normalization/AdaptiveHistogramEqualizationProcessor{TPixel}.cs index b5b07d7a8..14687426d 100644 --- a/src/ImageSharp/Processing/Processors/Normalization/AdaptiveHistogramEqualizationProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Normalization/AdaptiveHistogramEqualizationProcessor{TPixel}.cs @@ -86,42 +86,39 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization new Rectangle(0, 0, sourceWidth, tileYStartPositions.Count), in operation); - ref TPixel pixelsBase = ref source.GetPixelReference(0, 0); - // Fix left column - ProcessBorderColumn(ref pixelsBase, cdfData, 0, sourceWidth, sourceHeight, this.Tiles, tileHeight, xStart: 0, xEnd: halfTileWidth, luminanceLevels); + ProcessBorderColumn(source, cdfData, 0, sourceHeight, this.Tiles, tileHeight, xStart: 0, xEnd: halfTileWidth, luminanceLevels); // Fix right column int rightBorderStartX = ((this.Tiles - 1) * tileWidth) + halfTileWidth; - ProcessBorderColumn(ref pixelsBase, cdfData, this.Tiles - 1, sourceWidth, sourceHeight, this.Tiles, tileHeight, xStart: rightBorderStartX, xEnd: sourceWidth, luminanceLevels); + ProcessBorderColumn(source, cdfData, this.Tiles - 1, sourceHeight, this.Tiles, tileHeight, xStart: rightBorderStartX, xEnd: sourceWidth, luminanceLevels); // Fix top row - ProcessBorderRow(ref pixelsBase, cdfData, 0, sourceWidth, this.Tiles, tileWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels); + ProcessBorderRow(source, cdfData, 0, sourceWidth, this.Tiles, tileWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels); // Fix bottom row int bottomBorderStartY = ((this.Tiles - 1) * tileHeight) + halfTileHeight; - ProcessBorderRow(ref pixelsBase, cdfData, this.Tiles - 1, sourceWidth, this.Tiles, tileWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels); + ProcessBorderRow(source, cdfData, this.Tiles - 1, sourceWidth, this.Tiles, tileWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels); // Left top corner - ProcessCornerTile(ref pixelsBase, cdfData, sourceWidth, 0, 0, xStart: 0, xEnd: halfTileWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels); + ProcessCornerTile(source, cdfData, 0, 0, xStart: 0, xEnd: halfTileWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels); // Left bottom corner - ProcessCornerTile(ref pixelsBase, cdfData, sourceWidth, 0, this.Tiles - 1, xStart: 0, xEnd: halfTileWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels); + ProcessCornerTile(source, cdfData, 0, this.Tiles - 1, xStart: 0, xEnd: halfTileWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels); // Right top corner - ProcessCornerTile(ref pixelsBase, cdfData, sourceWidth, this.Tiles - 1, 0, xStart: rightBorderStartX, xEnd: sourceWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels); + ProcessCornerTile(source, cdfData, this.Tiles - 1, 0, xStart: rightBorderStartX, xEnd: sourceWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels); // Right bottom corner - ProcessCornerTile(ref pixelsBase, cdfData, sourceWidth, this.Tiles - 1, this.Tiles - 1, xStart: rightBorderStartX, xEnd: sourceWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels); + ProcessCornerTile(source, cdfData, this.Tiles - 1, this.Tiles - 1, xStart: rightBorderStartX, xEnd: sourceWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels); } } /// /// Processes the part of a corner tile which was previously left out. It consists of 1 / 4 of a tile and does not need interpolation. /// - /// The output pixels base reference. + /// The source image. /// The lookup table to remap the grey values. - /// The source image width. /// The x-position in the CDF lookup map. /// The y-position in the CDF lookup map. /// X start position. @@ -133,9 +130,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization /// or 65536 for 16-bit grayscale images. /// private static void ProcessCornerTile( - ref TPixel pixelsBase, + ImageFrame source, CdfTileData cdfData, - int sourceWidth, int cdfX, int cdfY, int xStart, @@ -146,10 +142,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization { for (int dy = yStart; dy < yEnd; dy++) { - int dyOffSet = dy * sourceWidth; + Span rowSpan = source.GetPixelRowSpan(dy); for (int dx = xStart; dx < xEnd; dx++) { - ref TPixel pixel = ref Unsafe.Add(ref pixelsBase, dyOffSet + dx); + ref TPixel pixel = ref rowSpan[dx]; float luminanceEqualized = cdfData.RemapGreyValue(cdfX, cdfY, GetLuminance(pixel, luminanceLevels)); pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); } @@ -159,10 +155,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization /// /// Processes a border column of the image which is half the size of the tile width. /// - /// The output pixels reference. + /// The source image. /// The pre-computed lookup tables to remap the grey values for each tiles. /// The X index of the lookup table to use. - /// The source image width. /// The source image height. /// The number of vertical tiles. /// The height of a tile. @@ -173,10 +168,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization /// or 65536 for 16-bit grayscale images. /// private static void ProcessBorderColumn( - ref TPixel pixelBase, + ImageFrame source, CdfTileData cdfData, int cdfX, - int sourceWidth, int sourceHeight, int tileCount, int tileHeight, @@ -194,10 +188,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization int tileY = 0; for (int dy = y; dy < yLimit; dy++) { - int dyOffSet = dy * sourceWidth; + Span rowSpan = source.GetPixelRowSpan(dy); for (int dx = xStart; dx < xEnd; dx++) { - ref TPixel pixel = ref Unsafe.Add(ref pixelBase, dyOffSet + dx); + ref TPixel pixel = ref rowSpan[dx]; float luminanceEqualized = InterpolateBetweenTwoTiles(pixel, cdfData, cdfX, cdfY, cdfX, cdfY + 1, tileY, tileHeight, luminanceLevels); pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); } @@ -213,7 +207,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization /// /// Processes a border row of the image which is half of the size of the tile height. /// - /// The output pixels base reference. + /// The source image. /// The pre-computed lookup tables to remap the grey values for each tiles. /// The Y index of the lookup table to use. /// The source image width. @@ -226,7 +220,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization /// or 65536 for 16-bit grayscale images. /// private static void ProcessBorderRow( - ref TPixel pixelBase, + ImageFrame source, CdfTileData cdfData, int cdfY, int sourceWidth, @@ -244,12 +238,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization { for (int dy = yStart; dy < yEnd; dy++) { - int dyOffSet = dy * sourceWidth; + Span rowSpan = source.GetPixelRowSpan(dy); int tileX = 0; int xLimit = Math.Min(x + tileWidth, sourceWidth - 1); for (int dx = x; dx < xLimit; dx++) { - ref TPixel pixel = ref Unsafe.Add(ref pixelBase, dyOffSet + dx); + ref TPixel pixel = ref rowSpan[dx]; float luminanceEqualized = InterpolateBetweenTwoTiles(pixel, cdfData, cdfX, cdfY, cdfX + 1, cdfY, tileX, tileWidth, luminanceLevels); pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); tileX++; @@ -410,8 +404,6 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization [MethodImpl(InliningOptions.ShortMethod)] public void Invoke(in RowInterval rows) { - ref TPixel sourceBase = ref this.source.GetPixelReference(0, 0); - for (int index = rows.Min; index < rows.Max; index++) { (int y, int cdfY) tileYStartPosition = this.tileYStartPositions[index]; @@ -427,11 +419,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization int xEnd = Math.Min(x + this.tileWidth, this.sourceWidth); for (int dy = y; dy < yEnd; dy++) { - int dyOffSet = dy * this.sourceWidth; + Span rowSpan = this.source.GetPixelRowSpan(dy); int tileX = 0; for (int dx = x; dx < xEnd; dx++) { - ref TPixel pixel = ref Unsafe.Add(ref sourceBase, dyOffSet + dx); + ref TPixel pixel = ref rowSpan[dx]; float luminanceEqualized = InterpolateBetweenFourTiles( pixel, this.cdfData, @@ -597,15 +589,13 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization [MethodImpl(InliningOptions.ShortMethod)] public void Invoke(in RowInterval rows) { - ref TPixel sourceBase = ref this.source.GetPixelReference(0, 0); - for (int index = rows.Min; index < rows.Max; index++) { int cdfX = 0; int cdfY = this.tileYStartPositions[index].cdfY; int y = this.tileYStartPositions[index].y; int endY = Math.Min(y + this.tileHeight, this.sourceHeight); - ref int cdfMinBase = ref MemoryMarshal.GetReference(this.cdfMinBuffer2D.GetRowSpan(cdfY)); + Span cdfMinSpan = this.cdfMinBuffer2D.GetRowSpan(cdfY); using IMemoryOwner histogramBuffer = this.allocator.Allocate(this.luminanceLevels); Span histogram = histogramBuffer.GetSpan(); @@ -620,10 +610,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization int xlimit = Math.Min(x + this.tileWidth, this.sourceWidth); for (int dy = y; dy < endY; dy++) { - int dyOffset = dy * this.sourceWidth; + Span rowSpan = this.source.GetPixelRowSpan(dy); for (int dx = x; dx < xlimit; dx++) { - int luminance = GetLuminance(Unsafe.Add(ref sourceBase, dyOffset + dx), this.luminanceLevels); + int luminance = GetLuminance(rowSpan[dx], this.luminanceLevels); histogram[luminance]++; } } @@ -633,7 +623,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization this.processor.ClipHistogram(histogram, this.processor.ClipLimit); } - Unsafe.Add(ref cdfMinBase, cdfX) = this.processor.CalculateCdf(ref cdfBase, ref histogramBase, histogram.Length - 1); + cdfMinSpan[cdfX] += this.processor.CalculateCdf(ref cdfBase, ref histogramBase, histogram.Length - 1); cdfX++; } diff --git a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs index 19514c4b6..74d293566 100644 --- a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs @@ -6,6 +6,7 @@ using System.Buffers; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Threading; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -51,7 +52,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization using IMemoryOwner histogramBuffer = memoryAllocator.Allocate(this.LuminanceLevels, AllocationOptions.Clean); - // Build the histogram of the grayscale levels + // Build the histogram of the grayscale levels. var grayscaleOperation = new GrayscaleLevelsRowOperation(interest, histogramBuffer, source, this.LuminanceLevels); ParallelRowIterator.IterateRows( this.Configuration, @@ -106,16 +107,24 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization } /// +#if NETSTANDARD2_0 + // https://github.com/SixLabors/ImageSharp/issues/1204 + [MethodImpl(MethodImplOptions.NoOptimization)] +#else [MethodImpl(InliningOptions.ShortMethod)] +#endif public void Invoke(int y) { ref int histogramBase = ref MemoryMarshal.GetReference(this.histogramBuffer.GetSpan()); - ref TPixel pixelBase = ref MemoryMarshal.GetReference(this.source.GetPixelRowSpan(y)); + Span pixelRow = this.source.GetPixelRowSpan(y); + int levels = this.luminanceLevels; for (int x = 0; x < this.bounds.Width; x++) { - int luminance = GetLuminance(Unsafe.Add(ref pixelBase, x), this.luminanceLevels); - Unsafe.Add(ref histogramBase, luminance)++; + // TODO: We should bulk convert here. + var vector = pixelRow[x].ToVector4(); + int luminance = ImageMaths.GetBT709Luminance(ref vector, levels); + Interlocked.Increment(ref Unsafe.Add(ref histogramBase, luminance)); } } } @@ -147,18 +156,27 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization } /// +#if NETSTANDARD2_0 + // https://github.com/SixLabors/ImageSharp/issues/1204 + [MethodImpl(MethodImplOptions.NoOptimization)] +#else [MethodImpl(InliningOptions.ShortMethod)] +#endif public void Invoke(int y) { ref int cdfBase = ref MemoryMarshal.GetReference(this.cdfBuffer.GetSpan()); - ref TPixel pixelBase = ref MemoryMarshal.GetReference(this.source.GetPixelRowSpan(y)); + Span pixelRow = this.source.GetPixelRowSpan(y); + int levels = this.luminanceLevels; + float noOfPixelsMinusCdfMin = this.numberOfPixelsMinusCdfMin; for (int x = 0; x < this.bounds.Width; x++) { - ref TPixel pixel = ref Unsafe.Add(ref pixelBase, x); - int luminance = GetLuminance(pixel, this.luminanceLevels); - float luminanceEqualized = Unsafe.Add(ref cdfBase, luminance) / this.numberOfPixelsMinusCdfMin; - pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); + // TODO: We should bulk convert here. + ref TPixel pixel = ref pixelRow[x]; + var vector = pixel.ToVector4(); + int luminance = ImageMaths.GetBT709Luminance(ref vector, levels); + float luminanceEqualized = Unsafe.Add(ref cdfBase, luminance) / noOfPixelsMinusCdfMin; + pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, vector.W)); } } } diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index e9e93a855..76759948b 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -26,18 +26,21 @@ - + - - - - - + + + + + + - + + + - + diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs new file mode 100644 index 000000000..61fb2745b --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs @@ -0,0 +1,21 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Block8x8F_AddInPlace + { + [Benchmark] + public float AddInplace() + { + float f = 42F; + Block8x8F b = default; + b.AddInPlace(f); + return f; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs new file mode 100644 index 000000000..0d1e67112 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs @@ -0,0 +1,37 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Block8x8F_MultiplyInPlaceBlock + { + private static readonly Block8x8F Source = Create8x8FloatData(); + + [Benchmark] + public void MultiplyInPlaceBlock() + { + Block8x8F dest = default; + Source.MultiplyInPlace(ref dest); + } + + private static Block8x8F Create8x8FloatData() + { + var result = new float[64]; + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + result[(i * 8) + j] = (i * 10) + j; + } + } + + var source = default(Block8x8F); + source.LoadFrom(result); + return source; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs new file mode 100644 index 000000000..31a6ca713 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs @@ -0,0 +1,21 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Block8x8F_MultiplyInPlaceScalar + { + [Benchmark] + public float MultiplyInPlaceScalar() + { + float f = 42F; + Block8x8F b = default; + b.MultiplyInPlace(f); + return f; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs new file mode 100644 index 000000000..1d103cd1a --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs @@ -0,0 +1,37 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Block8x8F_Transpose + { + private static readonly Block8x8F Source = Create8x8FloatData(); + + [Benchmark] + public void TransposeInto() + { + var dest = default(Block8x8F); + Source.TransposeInto(ref dest); + } + + private static Block8x8F Create8x8FloatData() + { + var result = new float[64]; + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + result[(i * 8) + j] = (i * 10) + j; + } + } + + var source = default(Block8x8F); + source.LoadFrom(result); + return source; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs new file mode 100644 index 000000000..1e6b9fe92 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs @@ -0,0 +1,41 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + [Config(typeof(Config.ShortClr))] + public class CmykColorConversion : ColorConversionBenchmark + { + public CmykColorConversion() + : base(4) + { + } + + [Benchmark(Baseline = true)] + public void Scalar() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromCmykBasic(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVector8() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromCmykVector8(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromCmykAvx2(8).ConvertToRgba(values, this.output); + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs new file mode 100644 index 000000000..da8c51735 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs @@ -0,0 +1,64 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Memory; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + public abstract class ColorConversionBenchmark + { + private readonly int componentCount; + protected Buffer2D[] input; + protected Vector4[] output; + + protected ColorConversionBenchmark(int componentCount) + { + this.componentCount = componentCount; + } + + public const int Count = 128; + + [GlobalSetup] + public void Setup() + { + this.input = CreateRandomValues(this.componentCount, Count); + this.output = new Vector4[Count]; + } + + [GlobalCleanup] + public void Cleanup() + { + foreach (Buffer2D buffer in this.input) + { + buffer.Dispose(); + } + } + + private static Buffer2D[] CreateRandomValues( + int componentCount, + int inputBufferLength, + float minVal = 0f, + float maxVal = 255f) + { + var rnd = new Random(42); + var buffers = new Buffer2D[componentCount]; + for (int i = 0; i < componentCount; i++) + { + var values = new float[inputBufferLength]; + + for (int j = 0; j < inputBufferLength; j++) + { + values[j] = ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; + } + + // no need to dispose when buffer is not array owner + buffers[i] = Configuration.Default.MemoryAllocator.Allocate2D(values.Length, 1); + } + + return buffers; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs new file mode 100644 index 000000000..74b3e6db6 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs @@ -0,0 +1,33 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + [Config(typeof(Config.ShortClr))] + public class GrayscaleColorConversion : ColorConversionBenchmark + { + public GrayscaleColorConversion() + : base(1) + { + } + + [Benchmark(Baseline = true)] + public void Scalar() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromGrayscaleBasic(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromGrayscaleAvx2(8).ConvertToRgba(values, this.output); + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs new file mode 100644 index 000000000..68dc0f04e --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs @@ -0,0 +1,41 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + [Config(typeof(Config.ShortClr))] + public class RgbColorConversion : ColorConversionBenchmark + { + public RgbColorConversion() + : base(3) + { + } + + [Benchmark(Baseline = true)] + public void Scalar() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromRgbBasic(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVector8() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromRgbVector8(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromRgbAvx2(8).ConvertToRgba(values, this.output); + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs index 7b47cf94a..94b28e4d9 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs @@ -1,39 +1,18 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. -using System; -using System.Numerics; - using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; -using SixLabors.ImageSharp.Memory; namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { [Config(typeof(Config.ShortClr))] - public class YCbCrColorConversion + public class YCbCrColorConversion : ColorConversionBenchmark { - private Buffer2D[] input; - - private Vector4[] output; - - public const int Count = 128; - - [GlobalSetup] - public void Setup() + public YCbCrColorConversion() + : base(3) { - this.input = CreateRandomValues(3, Count); - this.output = new Vector4[Count]; - } - - [GlobalCleanup] - public void Cleanup() - { - foreach (Buffer2D buffer in this.input) - { - buffer.Dispose(); - } } [Benchmark] @@ -41,15 +20,15 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { var values = new JpegColorConverter.ComponentValues(this.input, 0); - JpegColorConverter.FromYCbCrBasic.ConvertCore(values, this.output, 255F, 128F); + new JpegColorConverter.FromYCbCrBasic(8).ConvertToRgba(values, this.output); } [Benchmark(Baseline = true)] - public void SimdVector4() + public void SimdVector() { var values = new JpegColorConverter.ComponentValues(this.input, 0); - JpegColorConverter.FromYCbCrSimd.ConvertCore(values, this.output, 255F, 128F); + new JpegColorConverter.FromYCbCrVector4(8).ConvertToRgba(values, this.output); } [Benchmark] @@ -57,31 +36,15 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { var values = new JpegColorConverter.ComponentValues(this.input, 0); - JpegColorConverter.FromYCbCrSimdVector8.ConvertCore(values, this.output, 255F, 128F); + new JpegColorConverter.FromYCbCrVector8(8).ConvertToRgba(values, this.output); } - private static Buffer2D[] CreateRandomValues( - int componentCount, - int inputBufferLength, - float minVal = 0f, - float maxVal = 255f) + [Benchmark] + public void SimdVectorAvx2() { - var rnd = new Random(42); - var buffers = new Buffer2D[componentCount]; - for (int i = 0; i < componentCount; i++) - { - var values = new float[inputBufferLength]; - - for (int j = 0; j < inputBufferLength; j++) - { - values[j] = ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; - } - - // no need to dispose when buffer is not array owner - buffers[i] = Configuration.Default.MemoryAllocator.Allocate2D(values.Length, 1); - } + var values = new JpegColorConverter.ComponentValues(this.input, 0); - return buffers; + new JpegColorConverter.FromYCbCrAvx2(8).ConvertToRgba(values, this.output); } } } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs new file mode 100644 index 000000000..ed8758131 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs @@ -0,0 +1,41 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + [Config(typeof(Config.ShortClr))] + public class YccKColorConverter : ColorConversionBenchmark + { + public YccKColorConverter() + : base(4) + { + } + + [Benchmark(Baseline = true)] + public void Scalar() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromYccKBasic(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVector8() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromYccKVector8(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromYccKAvx2(8).ConvertToRgba(values, this.output); + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs index da15da24c..04ca8cd65 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs @@ -13,15 +13,13 @@ using System.Runtime.Intrinsics.X86; #endif using BenchmarkDotNet.Attributes; -using BenchmarkDotNet.Environments; -using BenchmarkDotNet.Jobs; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { - [Config(typeof(Config.ShortClr))] + [Config(typeof(Config.ShortCore31))] public abstract class FromVector4 where TPixel : unmanaged, IPixel { @@ -32,7 +30,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk protected Configuration Configuration => Configuration.Default; // [Params(64, 2048)] - [Params(1024)] + [Params(64, 256, 2048)] public int Count { get; set; } [GlobalSetup] @@ -60,7 +58,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk } } - [Benchmark] + [Benchmark(Baseline = true)] public void PixelOperations_Base() { new PixelOperations().FromVector4Destructive(this.Configuration, this.source.GetSpan(), this.destination.GetSpan()); @@ -93,7 +91,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk SimdUtils.BasicIntrinsics256.NormalizedFloatToByteSaturate(sBytes, dFloats); } - [Benchmark(Baseline = true)] + [Benchmark] public void ExtendedIntrinsic() { Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); @@ -104,12 +102,12 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk #if SUPPORTS_RUNTIME_INTRINSICS [Benchmark] - public void UseAvx2() + public void UseHwIntrinsics() { Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); - SimdUtils.Avx2Intrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats); + SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats); } private static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs new file mode 100644 index 000000000..5da6edc6b --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs @@ -0,0 +1,55 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortClr))] + public class FromVector4_Rgb24 : FromVector4 + { + } +} + +// 2020-11-02 +// ########## +// +// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1) +// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores +// .NET Core SDK=3.1.403 +// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT +// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT +// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// +// IterationCount=3 LaunchCount=1 WarmupCount=3 +// +// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:| +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 64 | 343.2 ns | 305.91 ns | 16.77 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 64 | 320.8 ns | 19.93 ns | 1.09 ns | 0.94 | 0.05 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 64 | 234.3 ns | 17.98 ns | 0.99 ns | 1.00 | 0.00 | 0.0052 | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 64 | 246.0 ns | 82.34 ns | 4.51 ns | 1.05 | 0.02 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 64 | 222.3 ns | 39.46 ns | 2.16 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 64 | 243.4 ns | 33.58 ns | 1.84 ns | 1.09 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 256 | 824.9 ns | 32.77 ns | 1.80 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 256 | 967.0 ns | 39.09 ns | 2.14 ns | 1.17 | 0.01 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 256 | 756.9 ns | 94.43 ns | 5.18 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 256 | 1,003.3 ns | 3,192.09 ns | 174.97 ns | 1.32 | 0.22 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 256 | 748.6 ns | 248.03 ns | 13.60 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 256 | 437.0 ns | 36.48 ns | 2.00 ns | 0.58 | 0.01 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 2048 | 5,751.6 ns | 704.24 ns | 38.60 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 2048 | 4,391.6 ns | 718.17 ns | 39.37 ns | 0.76 | 0.00 | 0.0153 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 2048 | 6,202.0 ns | 1,815.18 ns | 99.50 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 2048 | 4,225.6 ns | 1,004.03 ns | 55.03 ns | 0.68 | 0.01 | 0.0153 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 2048 | 6,157.1 ns | 2,516.98 ns | 137.96 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 2048 | 1,822.7 ns | 1,764.43 ns | 96.71 ns | 0.30 | 0.02 | 0.0172 | - | - | 72 B | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs new file mode 100644 index 000000000..4af028605 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs @@ -0,0 +1,87 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Pad3Shuffle4Channel + { + private static readonly DefaultPad3Shuffle4 Control = new DefaultPad3Shuffle4(1, 0, 3, 2); + private static readonly XYZWPad3Shuffle4 ControlFast = default; + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[this.Count * 4 / 3]; + } + + [Params(96, 384, 768, 1536)] + public int Count { get; set; } + + [Benchmark] + public void Pad3Shuffle4() + { + SimdUtils.Pad3Shuffle4(this.source, this.destination, Control); + } + + [Benchmark] + public void Pad3Shuffle4FastFallback() + { + SimdUtils.Pad3Shuffle4(this.source, this.destination, ControlFast); + } + } + + // 2020-10-30 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |------------------------- |------------------- |-------------------------------------------------- |------ |------------:|----------:|----------:|------------:|------:|--------:|------:|------:|------:|----------:| + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 120.64 ns | 7.190 ns | 21.200 ns | 114.26 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 96 | 23.63 ns | 0.175 ns | 0.155 ns | 23.65 ns | 0.15 | 0.01 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 25.25 ns | 0.356 ns | 0.298 ns | 25.27 ns | 0.17 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 14.80 ns | 0.358 ns | 1.032 ns | 14.64 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 96 | 24.84 ns | 0.376 ns | 0.333 ns | 24.74 ns | 1.57 | 0.06 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 96 | 24.58 ns | 0.471 ns | 0.704 ns | 24.38 ns | 1.60 | 0.09 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 258.92 ns | 4.873 ns | 4.069 ns | 257.95 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 384 | 41.41 ns | 0.859 ns | 1.204 ns | 41.33 ns | 0.16 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 40.74 ns | 0.848 ns | 0.793 ns | 40.48 ns | 0.16 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 74.50 ns | 0.490 ns | 0.383 ns | 74.49 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 384 | 40.74 ns | 0.624 ns | 0.584 ns | 40.72 ns | 0.55 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 384 | 38.28 ns | 0.534 ns | 0.417 ns | 38.22 ns | 0.51 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 503.91 ns | 6.466 ns | 6.048 ns | 501.58 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 768 | 62.86 ns | 0.332 ns | 0.277 ns | 62.80 ns | 0.12 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 64.59 ns | 0.469 ns | 0.415 ns | 64.62 ns | 0.13 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 110.51 ns | 0.592 ns | 0.554 ns | 110.33 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 768 | 64.72 ns | 1.306 ns | 1.090 ns | 64.51 ns | 0.59 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 768 | 62.11 ns | 0.816 ns | 0.682 ns | 61.98 ns | 0.56 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 1,005.84 ns | 13.176 ns | 12.325 ns | 1,004.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 110.05 ns | 0.256 ns | 0.214 ns | 110.04 ns | 0.11 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.545 ns | 0.483 ns | 110.09 ns | 0.11 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs new file mode 100644 index 000000000..2a886c687 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortCore31))] + public class PremultiplyVector4 + { + private static readonly Vector4[] Vectors = CreateVectors(); + + [Benchmark(Baseline = true)] + public void PremultiplyBaseline() + { + ref Vector4 baseRef = ref MemoryMarshal.GetReference(Vectors); + + for (int i = 0; i < Vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + Premultiply(ref v); + } + } + + [Benchmark] + public void Premultiply() + { + Vector4Utilities.Premultiply(Vectors); + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Premultiply(ref Vector4 source) + { + float w = source.W; + source *= w; + source.W = w; + } + + private static Vector4[] CreateVectors() + { + var rnd = new Random(42); + return GenerateRandomVectorArray(rnd, 2048, 0, 1); + } + + private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal) + { + var values = new Vector4[length]; + + for (int i = 0; i < length; i++) + { + ref Vector4 v = ref values[i]; + v.X = GetRandomFloat(rnd, minVal, maxVal); + v.Y = GetRandomFloat(rnd, minVal, maxVal); + v.Z = GetRandomFloat(rnd, minVal, maxVal); + v.W = GetRandomFloat(rnd, minVal, maxVal); + } + + return values; + } + + private static float GetRandomFloat(Random rnd, float minVal, float maxVal) + => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; + } +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs new file mode 100644 index 000000000..3667b973e --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs @@ -0,0 +1,64 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Shuffle3Channel + { + private static readonly DefaultShuffle3 Control = new DefaultShuffle3(1, 0, 2); + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[this.Count]; + } + + [Params(96, 384, 768, 1536)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle3() + { + SimdUtils.Shuffle3(this.source, this.destination, Control); + } + } + + // 2020-11-02 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |--------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 48.46 ns | 1.034 ns | 2.438 ns | 47.46 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 96 | 32.42 ns | 0.537 ns | 0.476 ns | 32.34 ns | 0.66 | 0.04 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 32.51 ns | 0.373 ns | 0.349 ns | 32.56 ns | 0.66 | 0.03 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 199.04 ns | 1.512 ns | 1.180 ns | 199.17 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 384 | 71.20 ns | 2.654 ns | 7.784 ns | 69.60 ns | 0.41 | 0.02 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 63.23 ns | 0.569 ns | 0.505 ns | 63.21 ns | 0.32 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 391.28 ns | 5.087 ns | 3.972 ns | 391.22 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 768 | 109.12 ns | 2.149 ns | 2.010 ns | 108.66 ns | 0.28 | 0.01 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 106.51 ns | 0.734 ns | 0.613 ns | 106.56 ns | 0.27 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 773.70 ns | 5.516 ns | 4.890 ns | 772.96 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 1536 | 190.41 ns | 1.090 ns | 0.851 ns | 190.38 ns | 0.25 | 0.00 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 190.94 ns | 0.985 ns | 0.769 ns | 190.85 ns | 0.25 | 0.00 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs new file mode 100644 index 000000000..9cf24ccd6 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs @@ -0,0 +1,95 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Shuffle4Slice3Channel + { + private static readonly DefaultShuffle4Slice3 Control = new DefaultShuffle4Slice3(1, 0, 3, 2); + private static readonly XYZWShuffle4Slice3 ControlFast = default; + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[(int)(this.Count * (3 / 4F))]; + } + + [Params(128, 256, 512, 1024, 2048)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Slice3() + { + SimdUtils.Shuffle4Slice3(this.source, this.destination, Control); + } + + [Benchmark] + public void Shuffle4Slice3FastFallback() + { + SimdUtils.Shuffle4Slice3(this.source, this.destination, ControlFast); + } + } + + // 2020-10-29 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |--------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 56.44 ns | 2.843 ns | 8.382 ns | 56.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 128 | 27.15 ns | 0.556 ns | 0.762 ns | 27.34 ns | 0.41 | 0.03 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 128 | 26.36 ns | 0.321 ns | 0.268 ns | 26.26 ns | 0.38 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 25.85 ns | 0.494 ns | 0.462 ns | 25.84 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 128 | 26.15 ns | 0.113 ns | 0.106 ns | 26.16 ns | 1.01 | 0.02 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 128 | 25.57 ns | 0.078 ns | 0.061 ns | 25.56 ns | 0.99 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 97.47 ns | 0.327 ns | 0.289 ns | 97.35 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 256 | 32.61 ns | 0.107 ns | 0.095 ns | 32.62 ns | 0.33 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.21 ns | 0.169 ns | 0.150 ns | 33.15 ns | 0.34 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 52.34 ns | 0.779 ns | 0.729 ns | 51.94 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 256 | 32.16 ns | 0.111 ns | 0.104 ns | 32.16 ns | 0.61 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.61 ns | 0.342 ns | 0.319 ns | 33.62 ns | 0.64 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 210.74 ns | 3.825 ns | 5.956 ns | 207.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 512 | 51.03 ns | 0.535 ns | 0.501 ns | 51.18 ns | 0.24 | 0.01 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 512 | 66.60 ns | 1.313 ns | 1.613 ns | 65.93 ns | 0.31 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 119.12 ns | 1.905 ns | 1.689 ns | 118.52 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 512 | 50.33 ns | 0.382 ns | 0.339 ns | 50.41 ns | 0.42 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 512 | 49.25 ns | 0.555 ns | 0.492 ns | 49.26 ns | 0.41 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 423.55 ns | 4.891 ns | 4.336 ns | 423.27 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 1024 | 77.13 ns | 1.355 ns | 2.264 ns | 76.19 ns | 0.19 | 0.01 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 79.39 ns | 0.103 ns | 0.086 ns | 79.37 ns | 0.19 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 226.57 ns | 2.930 ns | 2.598 ns | 226.10 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 1024 | 80.25 ns | 1.647 ns | 2.082 ns | 80.98 ns | 0.35 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 84.99 ns | 1.234 ns | 1.155 ns | 85.60 ns | 0.38 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 794.96 ns | 1.735 ns | 1.538 ns | 795.15 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 2048 | 128.41 ns | 0.417 ns | 0.390 ns | 128.24 ns | 0.16 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 127.24 ns | 0.294 ns | 0.229 ns | 127.23 ns | 0.16 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 382.97 ns | 1.064 ns | 0.831 ns | 382.87 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 2048 | 126.93 ns | 0.382 ns | 0.339 ns | 126.94 ns | 0.33 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 149.36 ns | 1.875 ns | 1.754 ns | 149.33 ns | 0.39 | 0.00 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs new file mode 100644 index 000000000..db4947001 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -0,0 +1,67 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class ShuffleByte4Channel + { + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[this.Count]; + } + + [Params(128, 256, 512, 1024, 2048)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Channel() + { + SimdUtils.Shuffle4(this.source, this.destination, default); + } + } + + // 2020-10-29 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs new file mode 100644 index 000000000..86b1f766e --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Tests; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class ShuffleFloat4Channel + { + private static readonly byte Control = default(WXYZShuffle4).Control; + private float[] source; + private float[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new Random(this.Count).GenerateRandomFloatArray(this.Count, 0, 256); + this.destination = new float[this.Count]; + } + + [Params(128, 256, 512, 1024, 2048)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Channel() + { + SimdUtils.Shuffle4(this.source, this.destination, Control); + } + } + + // 2020-10-29 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |------------------- |-------------------------------------------------- |------ |-----------:|----------:|----------:|------:|------:|------:|------:|----------:| + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.647 ns | 0.5475 ns | 0.4853 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 128 | 9.818 ns | 0.1457 ns | 0.1292 ns | 0.15 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 15.267 ns | 0.1005 ns | 0.0940 ns | 0.24 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 125.586 ns | 1.9312 ns | 1.8064 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 256 | 15.878 ns | 0.1983 ns | 0.1758 ns | 0.13 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 29.170 ns | 0.2925 ns | 0.2442 ns | 0.23 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 263.859 ns | 2.6660 ns | 2.3634 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 512 | 29.452 ns | 0.3334 ns | 0.3118 ns | 0.11 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 52.912 ns | 0.1932 ns | 0.1713 ns | 0.20 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 495.717 ns | 1.9850 ns | 1.8567 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 1024 | 53.757 ns | 0.3212 ns | 0.2847 ns | 0.11 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 107.815 ns | 1.6201 ns | 1.3528 ns | 0.22 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 980.134 ns | 3.7407 ns | 3.1237 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 2048 | 105.120 ns | 0.6140 ns | 0.5443 ns | 0.11 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 216.473 ns | 2.3268 ns | 2.0627 ns | 0.22 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs new file mode 100644 index 000000000..aecd41831 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs @@ -0,0 +1,65 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; + +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortClr))] + public class ToVector4_Rgb24 : ToVector4 + { + [Benchmark(Baseline = true)] + public void PixelOperations_Base() + { + new PixelOperations().ToVector4( + this.Configuration, + this.source.GetSpan(), + this.destination.GetSpan()); + } + } +} + +// 2020-11-02 +// ########## +// +// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1) +// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores +// .NET Core SDK=3.1.403 +// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT +// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT +// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// +// IterationCount=3 LaunchCount=1 WarmupCount=3 +// +// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:| +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 64 | 298.4 ns | 33.63 ns | 1.84 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 64 | 355.5 ns | 908.51 ns | 49.80 ns | 1.19 | 0.17 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 64 | 220.1 ns | 13.77 ns | 0.75 ns | 1.00 | 0.00 | 0.0055 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 64 | 228.5 ns | 41.41 ns | 2.27 ns | 1.04 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 64 | 213.6 ns | 12.47 ns | 0.68 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 64 | 217.0 ns | 9.95 ns | 0.55 ns | 1.02 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 256 | 829.0 ns | 242.93 ns | 13.32 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 256 | 448.9 ns | 4.04 ns | 0.22 ns | 0.54 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 256 | 863.0 ns | 1,253.26 ns | 68.70 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 256 | 309.2 ns | 66.16 ns | 3.63 ns | 0.36 | 0.03 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 256 | 737.0 ns | 253.90 ns | 13.92 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 256 | 212.3 ns | 1.07 ns | 0.06 ns | 0.29 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 2048 | 5,625.6 ns | 404.35 ns | 22.16 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 2048 | 1,974.1 ns | 229.84 ns | 12.60 ns | 0.35 | 0.00 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 2048 | 5,467.2 ns | 537.29 ns | 29.45 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 2048 | 1,985.5 ns | 4,714.23 ns | 258.40 ns | 0.36 | 0.05 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 2048 | 5,888.2 ns | 1,622.23 ns | 88.92 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 2048 | 1,165.0 ns | 191.71 ns | 10.51 ns | 0.20 | 0.00 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs index 145bf9889..9ae3b073d 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs @@ -13,7 +13,7 @@ using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { - [Config(typeof(Config.ShortClr))] + [Config(typeof(Config.ShortCore31))] public class ToVector4_Rgba32 : ToVector4 { [Benchmark] @@ -52,6 +52,17 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(sBytes, dFloats); } +#if SUPPORTS_RUNTIME_INTRINSICS + [Benchmark] + public void HwIntrinsics() + { + Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); + Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); + + SimdUtils.HwIntrinsics.ByteToNormalizedFloat(sBytes, dFloats); + } +#endif + // [Benchmark] public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_2Loops() { diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs new file mode 100644 index 000000000..1312c767b --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortCore31))] + public class UnPremultiplyVector4 + { + private static readonly Vector4[] Vectors = CreateVectors(); + + [Benchmark(Baseline = true)] + public void UnPremultiplyBaseline() + { + ref Vector4 baseRef = ref MemoryMarshal.GetReference(Vectors); + + for (int i = 0; i < Vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + UnPremultiply(ref v); + } + } + + [Benchmark] + public void UnPremultiply() + { + Vector4Utilities.UnPremultiply(Vectors); + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void UnPremultiply(ref Vector4 source) + { + float w = source.W; + source /= w; + source.W = w; + } + + private static Vector4[] CreateVectors() + { + var rnd = new Random(42); + return GenerateRandomVectorArray(rnd, 2048, 0, 1); + } + + private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal) + { + var values = new Vector4[length]; + + for (int i = 0; i < length; i++) + { + ref Vector4 v = ref values[i]; + v.X = GetRandomFloat(rnd, minVal, maxVal); + v.Y = GetRandomFloat(rnd, minVal, maxVal); + v.Z = GetRandomFloat(rnd, minVal, maxVal); + v.W = GetRandomFloat(rnd, minVal, maxVal); + } + + return values; + } + + private static float GetRandomFloat(Random rnd, float minVal, float maxVal) + => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; + } +} diff --git a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs new file mode 100644 index 000000000..5ceb4c8a0 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs @@ -0,0 +1,84 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif +using BenchmarkDotNet.Environments; +using BenchmarkDotNet.Jobs; + +namespace SixLabors.ImageSharp.Benchmarks +{ + public partial class Config + { + private const string On = "1"; + private const string Off = "0"; + + // See https://github.com/SixLabors/ImageSharp/pull/1229#discussion_r440477861 + // * EnableHWIntrinsic + // * EnableSSE + // * EnableSSE2 + // * EnableAES + // * EnablePCLMULQDQ + // * EnableSSE3 + // * EnableSSSE3 + // * EnableSSE41 + // * EnableSSE42 + // * EnablePOPCNT + // * EnableAVX + // * EnableFMA + // * EnableAVX2 + // * EnableBMI1 + // * EnableBMI2 + // * EnableLZCNT + // + // `FeatureSIMD` ends up impacting all SIMD support(including `System.Numerics`) but not things + // like `LZCNT`, `BMI1`, or `BMI2` + // `EnableSSE3_4` is a legacy switch that exists for compat and is basically the same as `EnableSSE3` + private const string EnableAES = "COMPlus_EnableAES"; + private const string EnableAVX = "COMPlus_EnableAVX"; + private const string EnableAVX2 = "COMPlus_EnableAVX2"; + private const string EnableBMI1 = "COMPlus_EnableBMI1"; + private const string EnableBMI2 = "COMPlus_EnableBMI2"; + private const string EnableFMA = "COMPlus_EnableFMA"; + private const string EnableHWIntrinsic = "COMPlus_EnableHWIntrinsic"; + private const string EnableLZCNT = "COMPlus_EnableLZCNT"; + private const string EnablePCLMULQDQ = "COMPlus_EnablePCLMULQDQ"; + private const string EnablePOPCNT = "COMPlus_EnablePOPCNT"; + private const string EnableSSE = "COMPlus_EnableSSE"; + private const string EnableSSE2 = "COMPlus_EnableSSE2"; + private const string EnableSSE3 = "COMPlus_EnableSSE3"; + private const string EnableSSE3_4 = "COMPlus_EnableSSE3_4"; + private const string EnableSSE41 = "COMPlus_EnableSSE41"; + private const string EnableSSE42 = "COMPlus_EnableSSE42"; + private const string EnableSSSE3 = "COMPlus_EnableSSSE3"; + private const string FeatureSIMD = "COMPlus_FeatureSIMD"; + + public class HwIntrinsics_SSE_AVX : Config + { + public HwIntrinsics_SSE_AVX() + { + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithEnvironmentVariables( + new EnvironmentVariable(EnableHWIntrinsic, Off), + new EnvironmentVariable(FeatureSIMD, Off)) + .WithId("1. No HwIntrinsics").AsBaseline()); + +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithId("2. AVX")); + } + + if (Sse.IsSupported) + { + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithEnvironmentVariables(new EnvironmentVariable(EnableAVX, Off)) + .WithId("3. SSE")); + } +#endif + } + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Config.cs b/tests/ImageSharp.Benchmarks/Config.cs index f9240779b..53271f522 100644 --- a/tests/ImageSharp.Benchmarks/Config.cs +++ b/tests/ImageSharp.Benchmarks/Config.cs @@ -12,7 +12,7 @@ using BenchmarkDotNet.Jobs; namespace SixLabors.ImageSharp.Benchmarks { - public class Config : ManualConfig + public partial class Config : ManualConfig { public Config() { diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs index 7d6c2efed..a933f890f 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs @@ -168,49 +168,27 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion [Benchmark] public void PixelConverter_Rgba32_ToArgb32() { - ref uint sBase = ref Unsafe.As(ref this.PermutedRunnerRgbaToArgb.Source[0]); - ref uint dBase = ref Unsafe.As(ref this.PermutedRunnerRgbaToArgb.Dest[0]); + Span source = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Source); + Span dest = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Dest); - for (int i = 0; i < this.Count; i++) - { - uint s = Unsafe.Add(ref sBase, i); - Unsafe.Add(ref dBase, i) = PixelConverter.FromRgba32.ToArgb32(s); - } - } - - [Benchmark] - public void PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer() - { - Span source = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Source); - Span dest = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Dest); - source.CopyTo(dest); - - ref uint dBase = ref MemoryMarshal.GetReference(dest); - - for (int i = 0; i < this.Count; i++) - { - uint s = Unsafe.Add(ref dBase, i); - Unsafe.Add(ref dBase, i) = PixelConverter.FromRgba32.ToArgb32(s); - } + PixelConverter.FromRgba32.ToArgb32(source, dest); } /* RESULTS: - Method | Count | Mean | Error | StdDev | Scaled | ScaledSD | - ---------------------------------------------------------- |------ |-----------:|-----------:|-----------:|-------:|---------:| - ByRef | 256 | 328.7 ns | 6.6141 ns | 6.1868 ns | 1.00 | 0.00 | - ByVal | 256 | 322.0 ns | 4.3541 ns | 4.0728 ns | 0.98 | 0.02 | - FromBytes | 256 | 321.5 ns | 3.3499 ns | 3.1335 ns | 0.98 | 0.02 | - InlineShuffle | 256 | 330.7 ns | 4.2525 ns | 3.9778 ns | 1.01 | 0.02 | - PixelConverter_Rgba32_ToArgb32 | 256 | 167.4 ns | 0.6357 ns | 0.5309 ns | 0.51 | 0.01 | - PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer | 256 | 196.6 ns | 0.8929 ns | 0.7915 ns | 0.60 | 0.01 | - | | | | | | | - ByRef | 2048 | 2,534.4 ns | 8.2947 ns | 6.9265 ns | 1.00 | 0.00 | - ByVal | 2048 | 2,638.5 ns | 52.6843 ns | 70.3320 ns | 1.04 | 0.03 | - FromBytes | 2048 | 2,517.2 ns | 40.8055 ns | 38.1695 ns | 0.99 | 0.01 | - InlineShuffle | 2048 | 2,546.5 ns | 21.2506 ns | 19.8778 ns | 1.00 | 0.01 | - PixelConverter_Rgba32_ToArgb32 | 2048 | 1,265.7 ns | 5.1397 ns | 4.5562 ns | 0.50 | 0.00 | - PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer | 2048 | 1,410.3 ns | 11.1939 ns | 9.9231 ns | 0.56 | 0.00 | - */ + | Method | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | + |------------------------------- |------ |------------:|----------:|----------:|------------:|------:|--------:| + | ByRef | 256 | 288.84 ns | 19.601 ns | 52.319 ns | 268.10 ns | 1.00 | 0.00 | + | ByVal | 256 | 267.97 ns | 1.831 ns | 1.713 ns | 267.85 ns | 0.77 | 0.18 | + | FromBytes | 256 | 266.81 ns | 2.427 ns | 2.270 ns | 266.47 ns | 0.76 | 0.18 | + | InlineShuffle | 256 | 291.41 ns | 5.820 ns | 5.444 ns | 290.17 ns | 0.83 | 0.19 | + | PixelConverter_Rgba32_ToArgb32 | 256 | 38.62 ns | 0.431 ns | 0.403 ns | 38.68 ns | 0.11 | 0.03 | + | | | | | | | | | + | ByRef | 2048 | 2,197.69 ns | 15.826 ns | 14.804 ns | 2,197.25 ns | 1.00 | 0.00 | + | ByVal | 2048 | 2,226.81 ns | 44.266 ns | 62.054 ns | 2,197.17 ns | 1.03 | 0.04 | + | FromBytes | 2048 | 2,181.35 ns | 18.033 ns | 16.868 ns | 2,185.97 ns | 0.99 | 0.01 | + | InlineShuffle | 2048 | 2,233.10 ns | 27.673 ns | 24.531 ns | 2,229.78 ns | 1.02 | 0.01 | + | PixelConverter_Rgba32_ToArgb32 | 2048 | 139.90 ns | 2.152 ns | 3.825 ns | 138.70 ns | 0.06 | 0.00 | + */ } } diff --git a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj index eaab162ff..e8ad660ad 100644 --- a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj +++ b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj @@ -17,6 +17,7 @@ + @@ -25,7 +26,7 @@ - + diff --git a/tests/ImageSharp.Benchmarks/Processing/HistogramEqualization.cs b/tests/ImageSharp.Benchmarks/Processing/HistogramEqualization.cs new file mode 100644 index 000000000..081d3e8e3 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Processing/HistogramEqualization.cs @@ -0,0 +1,53 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System.IO; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.PixelFormats; +using SixLabors.ImageSharp.Processing; +using SixLabors.ImageSharp.Processing.Processors.Normalization; +using SixLabors.ImageSharp.Tests; + +namespace SixLabors.ImageSharp.Benchmarks.Processing +{ + [Config(typeof(Config.ShortClr))] + public class HistogramEqualization : BenchmarkBase + { + private Image image; + + [GlobalSetup] + public void ReadImages() + { + if (this.image == null) + { + this.image = Image.Load(File.OpenRead(Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, TestImages.Jpeg.Baseline.HistogramEqImage))); + } + } + + [GlobalCleanup] + public void Cleanup() + { + this.image.Dispose(); + } + + [Benchmark(Description = "Global Histogram Equalization")] + public void GlobalHistogramEqualization() + { + this.image.Mutate(img => img.HistogramEqualization(new HistogramEqualizationOptions() + { + LuminanceLevels = 256, + Method = HistogramEqualizationMethod.Global + })); + } + + [Benchmark(Description = "AdaptiveHistogramEqualization (Tile interpolation)")] + public void AdaptiveHistogramEqualization() + { + this.image.Mutate(img => img.HistogramEqualization(new HistogramEqualizationOptions() + { + LuminanceLevels = 256, + Method = HistogramEqualizationMethod.AdaptiveTileInterpolation + })); + } + } +} diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs new file mode 100644 index 000000000..f1bfaa4ad --- /dev/null +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -0,0 +1,399 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using SixLabors.ImageSharp.Tests.TestUtilities; +using Xunit; + +namespace SixLabors.ImageSharp.Tests.Common +{ + public partial class SimdUtilsTests + { + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffleFloat4Channel(int count) + { + static void RunTest(string serialized) + { + // No need to test multiple shuffle controls as the + // pipeline is always the same. + int size = FeatureTestRunner.Deserialize(serialized); + byte control = default(WZYXShuffle4).Control; + + TestShuffleFloat4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, control), + control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); + } + + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffleByte4Channel(int count) + { + static void RunTest(string serialized) + { + int size = FeatureTestRunner.Deserialize(serialized); + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IShuffle4 to the generic utils method. + WXYZShuffle4 wxyz = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz), + wxyz.Control); + + WZYXShuffle4 wzyx = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx), + wzyx.Control); + + YZWXShuffle4 yzwx = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx), + yzwx.Control); + + ZYXWShuffle4 zyxw = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw), + zyxw.Control); + + var xwyz = new DefaultShuffle4(2, 1, 3, 0); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultShuffle4(1, 1, 1, 1); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultShuffle4(3, 3, 3, 3); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww), + wwww.Control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy3))] + public void BulkShuffleByte3Channel(int count) + { + static void RunTest(string serialized) + { + int size = FeatureTestRunner.Deserialize(serialized); + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IShuffle3 to the generic utils method. + var zyx = new DefaultShuffle3(0, 1, 2); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx), + zyx.Control); + + var xyz = new DefaultShuffle3(2, 1, 0); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, xyz), + xyz.Control); + + var yyy = new DefaultShuffle3(1, 1, 1); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, yyy), + yyy.Control); + + var zzz = new DefaultShuffle3(2, 2, 2); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zzz), + zzz.Control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy3))] + public void BulkPad3Shuffle4Channel(int count) + { + static void RunTest(string serialized) + { + int size = FeatureTestRunner.Deserialize(serialized); + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IPad3Shuffle4 to the generic utils method. + XYZWPad3Shuffle4 xyzw = default; + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xyzw), + xyzw.Control); + + var xwyz = new DefaultPad3Shuffle4(2, 1, 3, 0); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultPad3Shuffle4(1, 1, 1, 1); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultPad3Shuffle4(3, 3, 3, 3); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, wwww), + wwww.Control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffle4Slice3Channel(int count) + { + static void RunTest(string serialized) + { + int size = FeatureTestRunner.Deserialize(serialized); + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IShuffle4Slice3 to the generic utils method. + XYZWShuffle4Slice3 xyzw = default; + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xyzw), + xyzw.Control); + + var xwyz = new DefaultShuffle4Slice3(2, 1, 3, 0); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultShuffle4Slice3(1, 1, 1, 1); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultShuffle4Slice3(3, 3, 3, 3); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, wwww), + wwww.Control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); + } + + private static void TestShuffleFloat4Channel( + int count, + Action, Memory> convert, + byte control) + { + float[] source = new Random(count).GenerateRandomFloatArray(count, 0, 256); + var result = new float[count]; + + float[] expected = new float[count]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < expected.Length; i += 4) + { + expected[i] = source[p0 + i]; + expected[i + 1] = source[p1 + i]; + expected[i + 2] = source[p2 + i]; + expected[i + 3] = source[p3 + i]; + } + + convert(source, result); + + Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5F)); + } + + private static void TestShuffleByte4Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + var result = new byte[count]; + + byte[] expected = new byte[count]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < expected.Length; i += 4) + { + expected[i] = source[p0 + i]; + expected[i + 1] = source[p1 + i]; + expected[i + 2] = source[p2 + i]; + expected[i + 3] = source[p3 + i]; + } + + convert(source, result); + + Assert.Equal(expected, result); + } + + private static void TestShuffleByte3Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + var result = new byte[count]; + + byte[] expected = new byte[count]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int _, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < expected.Length; i += 3) + { + expected[i] = source[p0 + i]; + expected[i + 1] = source[p1 + i]; + expected[i + 2] = source[p2 + i]; + } + + convert(source, result); + + Assert.Equal(expected, result); + } + + private static void TestPad3Shuffle4Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + + var result = new byte[count * 4 / 3]; + + byte[] expected = new byte[result.Length]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3) + { + expected[p0 + i] = source[j]; + expected[p1 + i] = source[j + 1]; + expected[p2 + i] = source[j + 2]; + expected[p3 + i] = byte.MaxValue; + } + + Span temp = stackalloc byte[4]; + for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3) + { + temp[0] = source[j]; + temp[1] = source[j + 1]; + temp[2] = source[j + 2]; + temp[3] = byte.MaxValue; + + expected[i] = temp[p0]; + expected[i + 1] = temp[p1]; + expected[i + 2] = temp[p2]; + expected[i + 3] = temp[p3]; + } + + convert(source, result); + + for (int i = 0; i < expected.Length; i++) + { + Assert.Equal(expected[i], result[i]); + } + + Assert.Equal(expected, result); + } + + private static void TestShuffle4Slice3Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + + var result = new byte[count * 3 / 4]; + + byte[] expected = new byte[result.Length]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int _, + out int p2, + out int p1, + out int p0); + + for (int i = 0, j = 0; i < expected.Length; i += 3, j += 4) + { + expected[i] = source[p0 + j]; + expected[i + 1] = source[p1 + j]; + expected[i + 2] = source[p2 + j]; + } + + convert(source, result); + + for (int i = 0; i < expected.Length; i++) + { + Assert.Equal(expected[i], result[i]); + } + + Assert.Equal(expected, result); + } + } +} diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 6dce48935..ec09e43e5 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -7,13 +7,13 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.Common.Tuples; - +using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; namespace SixLabors.ImageSharp.Tests.Common { - public class SimdUtilsTests + public partial class SimdUtilsTests { private ITestOutputHelper Output { get; } @@ -163,7 +163,7 @@ namespace SixLabors.ImageSharp.Tests.Common public static readonly TheoryData ArraySizesDivisibleBy8 = new TheoryData { 0, 8, 16, 1024 }; public static readonly TheoryData ArraySizesDivisibleBy4 = new TheoryData { 0, 4, 8, 28, 1020 }; - + public static readonly TheoryData ArraySizesDivisibleBy3 = new TheoryData { 0, 3, 9, 36, 957 }; public static readonly TheoryData ArraySizesDivisibleBy32 = new TheoryData { 0, 32, 512 }; public static readonly TheoryData ArbitraryArraySizes = @@ -204,6 +204,25 @@ namespace SixLabors.ImageSharp.Tests.Common (s, d) => SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(s.Span, d.Span)); } +#if SUPPORTS_RUNTIME_INTRINSICS + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy32))] + public void HwIntrinsics_BulkConvertByteToNormalizedFloat(int count) + { + static void RunTest(string serialized) + { + TestImpl_BulkConvertByteToNormalizedFloat( + FeatureTestRunner.Deserialize(serialized), + (s, d) => SimdUtils.HwIntrinsics.ByteToNormalizedFloat(s.Span, d.Span)); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE41); + } +#endif + [Theory] [MemberData(nameof(ArbitraryArraySizes))] public void BulkConvertByteToNormalizedFloat(int count) @@ -281,16 +300,19 @@ namespace SixLabors.ImageSharp.Tests.Common [Theory] [MemberData(nameof(ArraySizesDivisibleBy32))] - public void Avx2_BulkConvertNormalizedFloatToByteClampOverflows(int count) + public void HwIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count) { - if (!System.Runtime.Intrinsics.X86.Avx2.IsSupported) + static void RunTest(string serialized) { - return; + TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( + FeatureTestRunner.Deserialize(serialized), + (s, d) => SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span)); } - TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, count, - (s, d) => SimdUtils.Avx2Intrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span)); + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2); } #endif diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs index 3f767620a..f98fa3c7f 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs @@ -39,22 +39,32 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp }; [Theory] - [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32, false)] - [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32, true)] - public void BmpDecoder_CanDecode_MiscellaneousBitmaps(TestImageProvider provider, bool enforceDiscontiguousBuffers) + [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32)] + public void BmpDecoder_CanDecode_MiscellaneousBitmaps(TestImageProvider provider) where TPixel : unmanaged, IPixel + { + using Image image = provider.GetImage(BmpDecoder); + image.DebugSave(provider); + + if (TestEnvironment.IsWindows) + { + image.CompareToOriginal(provider); + } + } + + [Theory] + [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32)] + public void BmpDecoder_CanDecode_MiscellaneousBitmaps_WithLimitedAllocatorBufferCapacity( + TestImageProvider provider) { static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); - if (!string.IsNullOrEmpty(nonContiguousBuffersStr)) - { - provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - } + provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(BmpDecoder); - image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr); + using Image image = provider.GetImage(BmpDecoder); + image.DebugSave(provider, nonContiguousBuffersStr); if (TestEnvironment.IsWindows) { @@ -66,7 +76,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp RemoteExecutor.Invoke( RunTest, providerDump, - enforceDiscontiguousBuffers ? "Disco" : string.Empty) + "Disco") .Dispose(); } @@ -348,7 +358,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp using (Image image = provider.GetImage(BmpDecoder)) { image.DebugSave(provider); - image.CompareToOriginal(provider); + + // Do not validate. Reference files will fail validation. + image.CompareToOriginal(provider, new MagickReferenceDecoder(false)); } } diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs index b05486e35..83b67a01a 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs @@ -10,7 +10,7 @@ using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing; using SixLabors.ImageSharp.Processing.Processors.Quantization; using SixLabors.ImageSharp.Tests.TestUtilities.ImageComparison; - +using SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs; using Xunit; using Xunit.Abstractions; @@ -200,10 +200,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp Quantizer = new WuQuantizer() }; string actualOutputFile = provider.Utility.SaveTestOutputFile(image, "bmp", encoder, appendPixelTypeToFileName: false); + + // Use the default decoder to test our encoded image. This verifies the content. + // We do not verify the reference image though as some are invalid. IImageDecoder referenceDecoder = TestEnvironment.GetReferenceDecoder(actualOutputFile); using (var referenceImage = Image.Load(actualOutputFile, referenceDecoder)) { - referenceImage.CompareToReferenceOutput(ImageComparer.TolerantPercentage(0.01f), provider, extension: "bmp", appendPixelTypeToFileName: false); + referenceImage.CompareToReferenceOutput( + ImageComparer.TolerantPercentage(0.01f), + provider, + extension: "bmp", + appendPixelTypeToFileName: false, + decoder: new MagickReferenceDecoder(false)); } } } @@ -226,10 +234,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp Quantizer = new OctreeQuantizer() }; string actualOutputFile = provider.Utility.SaveTestOutputFile(image, "bmp", encoder, appendPixelTypeToFileName: false); + + // Use the default decoder to test our encoded image. This verifies the content. + // We do not verify the reference image though as some are invalid. IImageDecoder referenceDecoder = TestEnvironment.GetReferenceDecoder(actualOutputFile); using (var referenceImage = Image.Load(actualOutputFile, referenceDecoder)) { - referenceImage.CompareToReferenceOutput(ImageComparer.TolerantPercentage(0.01f), provider, extension: "bmp", appendPixelTypeToFileName: false); + referenceImage.CompareToReferenceOutput( + ImageComparer.TolerantPercentage(0.01f), + provider, + extension: "bmp", + appendPixelTypeToFileName: false, + decoder: new MagickReferenceDecoder(false)); } } } diff --git a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs index 63aae5c55..eb2643b8c 100644 --- a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs @@ -198,17 +198,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Gif [Theory] [WithFile(TestImages.Gif.Giphy, PixelTypes.Rgba32)] [WithFile(TestImages.Gif.Kumin, PixelTypes.Rgba32)] - public void GifDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void GifDecoder_CanDecode_WithLimitedAllocatorBufferCapacity( + TestImageProvider provider) { static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider + = BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(GifDecoder); - image.DebugSave(provider); + using Image image = provider.GetImage(GifDecoder); + image.DebugSave(provider, nonContiguousBuffersStr); image.CompareToOriginal(provider); } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 722521f98..927d7c252 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -8,7 +8,7 @@ using System.Diagnostics; using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; - +using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; @@ -45,20 +45,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => + { + var block = default(Block8x8F); + + for (int i = 0; i < Block8x8F.Size; i++) { - var block = default(Block8x8F); - - for (int i = 0; i < Block8x8F.Size; i++) - { - block[i] = i; - } - - sum = 0; - for (int i = 0; i < Block8x8F.Size; i++) - { - sum += block[i]; - } - }); + block[i] = i; + } + + sum = 0; + for (int i = 0; i < Block8x8F.Size; i++) + { + sum += block[i]; + } + }); Assert.Equal(sum, 64f * 63f * 0.5f); } @@ -70,20 +70,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => + { + // Block8x8F block = new Block8x8F(); + float[] block = new float[64]; + for (int i = 0; i < Block8x8F.Size; i++) { - // Block8x8F block = new Block8x8F(); - float[] block = new float[64]; - for (int i = 0; i < Block8x8F.Size; i++) - { - block[i] = i; - } - - sum = 0; - for (int i = 0; i < Block8x8F.Size; i++) - { - sum += block[i]; - } - }); + block[i] = i; + } + + sum = 0; + for (int i = 0; i < Block8x8F.Size; i++) + { + sum += block[i]; + } + }); Assert.Equal(sum, 64f * 63f * 0.5f); } @@ -101,11 +101,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => - { - var b = default(Block8x8F); - b.LoadFrom(data); - b.ScaledCopyTo(mirror); - }); + { + var b = default(Block8x8F); + b.LoadFrom(data); + b.ScaledCopyTo(mirror); + }); Assert.Equal(data, mirror); @@ -126,11 +126,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => - { - var b = default(Block8x8F); - Block8x8F.LoadFrom(&b, data); - Block8x8F.ScaledCopyTo(&b, mirror); - }); + { + var b = default(Block8x8F); + Block8x8F.LoadFrom(&b, data); + Block8x8F.ScaledCopyTo(&b, mirror); + }); Assert.Equal(data, mirror); @@ -151,11 +151,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => - { - var v = default(Block8x8F); - v.LoadFrom(data); - v.ScaledCopyTo(mirror); - }); + { + var v = default(Block8x8F); + v.LoadFrom(data); + v.ScaledCopyTo(mirror); + }); Assert.Equal(data, mirror); @@ -165,19 +165,26 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Fact] public void TransposeInto() { - float[] expected = Create8x8FloatData(); - ReferenceImplementations.Transpose8x8(expected); + static void RunTest() + { + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); - var source = default(Block8x8F); - source.LoadFrom(Create8x8FloatData()); + var source = default(Block8x8F); + source.LoadFrom(Create8x8FloatData()); - var dest = default(Block8x8F); - source.TransposeInto(ref dest); + var dest = default(Block8x8F); + source.TransposeInto(ref dest); - float[] actual = new float[64]; - dest.ScaledCopyTo(actual); + float[] actual = new float[64]; + dest.ScaledCopyTo(actual); - Assert.Equal(expected, actual); + Assert.Equal(expected, actual); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); } private class BufferHolder @@ -228,7 +235,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.PrintLinearData(input); Block8x8F dest = block; - dest.NormalizeColorsInplace(255); + dest.NormalizeColorsInPlace(255); float[] array = new float[64]; dest.ScaledCopyTo(array); @@ -253,11 +260,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Block8x8F source = CreateRandomFloatBlock(-200, 200, seed); Block8x8F expected = source; - expected.NormalizeColorsInplace(255); - expected.RoundInplace(); + expected.NormalizeColorsInPlace(255); + expected.RoundInPlace(); Block8x8F actual = source; - actual.NormalizeColorsAndRoundInplaceVector8(255); + actual.NormalizeColorsAndRoundInPlaceVector8(255); this.Output.WriteLine(expected.ToString()); this.Output.WriteLine(actual.ToString()); @@ -318,12 +325,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [InlineData(1)] [InlineData(2)] [InlineData(3)] - public void RoundInplaceSlow(int seed) + public void RoundInPlaceSlow(int seed) { Block8x8F s = CreateRandomFloatBlock(-500, 500, seed); Block8x8F d = s; - d.RoundInplace(); + d.RoundInPlace(); this.Output.WriteLine(s.ToString()); this.Output.WriteLine(d.ToString()); @@ -338,19 +345,26 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg } [Fact] - public void MultiplyInplace_ByOtherBlock() + public void MultiplyInPlace_ByOtherBlock() { - Block8x8F original = CreateRandomFloatBlock(-500, 500, 42); - Block8x8F m = CreateRandomFloatBlock(-500, 500, 42); + static void RunTest() + { + Block8x8F original = CreateRandomFloatBlock(-500, 500, 42); + Block8x8F m = CreateRandomFloatBlock(-500, 500, 42); - Block8x8F actual = original; + Block8x8F actual = original; - actual.MultiplyInplace(ref m); + actual.MultiplyInPlace(ref m); - for (int i = 0; i < Block8x8F.Size; i++) - { - Assert.Equal(original[i] * m[i], actual[i]); + for (int i = 0; i < Block8x8F.Size; i++) + { + Assert.Equal(original[i] * m[i], actual[i]); + } } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); } [Theory] @@ -390,23 +404,51 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg ReferenceImplementations.DequantizeBlock(&expected, &qt, unzig.Data); - actual.MultiplyInplace(ref zigQt); + actual.MultiplyInPlace(ref zigQt); this.CompareBlocks(expected, actual, 0); } [Fact] - public void MultiplyInplace_ByScalar() + public void AddToAllInPlace() { - Block8x8F original = CreateRandomFloatBlock(-500, 500); + static void RunTest() + { + Block8x8F original = CreateRandomFloatBlock(-500, 500); - Block8x8F actual = original; - actual.MultiplyInplace(42f); + Block8x8F actual = original; + actual.AddInPlace(42f); - for (int i = 0; i < 64; i++) + for (int i = 0; i < 64; i++) + { + Assert.Equal(original[i] + 42f, actual[i]); + } + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); + } + + [Fact] + public void MultiplyInPlace_ByScalar() + { + static void RunTest() { - Assert.Equal(original[i] * 42f, actual[i]); + Block8x8F original = CreateRandomFloatBlock(-500, 500); + + Block8x8F actual = original; + actual.MultiplyInPlace(42f); + + for (int i = 0; i < 64; i++) + { + Assert.Equal(original[i] * 42f, actual[i]); + } } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); } [Fact] diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index 860f9c396..68210caea 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -22,6 +22,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg private static readonly ApproximateColorSpaceComparer ColorSpaceComparer = new ApproximateColorSpaceComparer(Precision); + // int inputBufferLength, int resultBufferLength, int seed public static readonly TheoryData CommonConversionData = new TheoryData { @@ -41,9 +42,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromYCbCrBasic(int inputBufferLength, int resultBufferLength, int seed) + public void FromYCbCrBasic(int inputBufferLength, int resultBufferLength, int seed) { - ValidateRgbToYCbCrConversion( + ValidateConversion( new JpegColorConverter.FromYCbCrBasic(8), 3, inputBufferLength, @@ -51,44 +52,36 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg seed); } - private static void ValidateYCbCr(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) - { - float y = values.Component0[i]; - float cb = values.Component1[i]; - float cr = values.Component2[i]; - var ycbcr = new YCbCr(y, cb, cr); - - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = ColorSpaceConverter.ToRgb(ycbcr); - - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); - } - [Theory] - [InlineData(64, 1)] - [InlineData(16, 2)] - [InlineData(8, 3)] - public void FromYCbCrSimd_ConvertCore(int size, int seed) + [MemberData(nameof(CommonConversionData))] + public void FromYCbCrVector4(int inputBufferLength, int resultBufferLength, int seed) { - JpegColorConverter.ComponentValues values = CreateRandomValues(3, size, seed); - var result = new Vector4[size]; - - JpegColorConverter.FromYCbCrSimd.ConvertCore(values, result, 255, 128); - - for (int i = 0; i < size; i++) + if (!SimdUtils.HasVector4) { - ValidateYCbCr(values, result, i); + this.Output.WriteLine("No SSE present, skipping test!"); + return; } + + ValidateConversion( + new JpegColorConverter.FromYCbCrVector4(8), + 3, + inputBufferLength, + resultBufferLength, + seed); } [Theory] [MemberData(nameof(CommonConversionData))] - public void FromYCbCrSimd(int inputBufferLength, int resultBufferLength, int seed) + public void FromYCbCrVector8(int inputBufferLength, int resultBufferLength, int seed) { - ValidateRgbToYCbCrConversion( - new JpegColorConverter.FromYCbCrSimd(8), + if (!SimdUtils.HasVector8) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } + + ValidateConversion( + new JpegColorConverter.FromYCbCrVector8(8), 3, inputBufferLength, resultBufferLength, @@ -97,17 +90,16 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Theory] [MemberData(nameof(CommonConversionData))] - public void FromYCbCrSimdAvx2(int inputBufferLength, int resultBufferLength, int seed) + public void FromYCbCrAvx2(int inputBufferLength, int resultBufferLength, int seed) { - if (!SimdUtils.HasVector8) + if (!SimdUtils.HasAvx2) { this.Output.WriteLine("No AVX2 present, skipping test!"); return; } - // JpegColorConverter.FromYCbCrSimdAvx2.LogPlz = s => this.Output.WriteLine(s); - ValidateRgbToYCbCrConversion( - new JpegColorConverter.FromYCbCrSimdVector8(8), + ValidateConversion( + new JpegColorConverter.FromYCbCrAvx2(8), 3, inputBufferLength, resultBufferLength, @@ -116,7 +108,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromYCbCr_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) + public void FromYCbCr_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) { ValidateConversion( JpegColorSpace.YCbCr, @@ -126,149 +118,251 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg seed); } - // Benchmark, for local execution only - // [Theory] - // [InlineData(false)] - // [InlineData(true)] - public void BenchmarkYCbCr(bool simd) + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromCmykBasic(int inputBufferLength, int resultBufferLength, int seed) { - int count = 2053; - int times = 50000; - - JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1); - var result = new Vector4[count]; + ValidateConversion( + new JpegColorConverter.FromCmykBasic(8), + 4, + inputBufferLength, + resultBufferLength, + seed); + } - JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrSimd(8) : new JpegColorConverter.FromYCbCrBasic(8); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromCmykVector8(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasVector8) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } - // Warm up: - converter.ConvertToRgba(values, result); + ValidateConversion( + new JpegColorConverter.FromCmykVector8(8), + 4, + inputBufferLength, + resultBufferLength, + seed); + } - using (new MeasureGuard(this.Output, $"{converter.GetType().Name} x {times}")) + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromCmykAvx2(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasAvx2) { - for (int i = 0; i < times; i++) - { - converter.ConvertToRgba(values, result); - } + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; } + + ValidateConversion( + new JpegColorConverter.FromCmykAvx2(8), + 4, + inputBufferLength, + resultBufferLength, + seed); } [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromCmyk(int inputBufferLength, int resultBufferLength, int seed) + public void FromCmyk_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) { - var v = new Vector4(0, 0, 0, 1F); - var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F); - - var converter = JpegColorConverter.GetConverter(JpegColorSpace.Cmyk, 8); - JpegColorConverter.ComponentValues values = CreateRandomValues(4, inputBufferLength, seed); - var result = new Vector4[resultBufferLength]; + ValidateConversion( + JpegColorSpace.Cmyk, + 4, + inputBufferLength, + resultBufferLength, + seed); + } - converter.ConvertToRgba(values, result); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromGrayscaleBasic(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + new JpegColorConverter.FromGrayscaleBasic(8), + 1, + inputBufferLength, + resultBufferLength, + seed); + } - for (int i = 0; i < resultBufferLength; i++) + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromGrayscaleAvx2(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasAvx2) { - float c = values.Component0[i]; - float m = values.Component1[i]; - float y = values.Component2[i]; - float k = values.Component3[i] / 255F; + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } - v.X = c * k; - v.Y = m * k; - v.Z = y * k; - v.W = 1F; + ValidateConversion( + new JpegColorConverter.FromGrayscaleAvx2(8), + 1, + inputBufferLength, + resultBufferLength, + seed); + } - v *= scale; + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromGraysacle_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + JpegColorSpace.Grayscale, + 1, + inputBufferLength, + resultBufferLength, + seed); + } - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = new Rgb(v.X, v.Y, v.Z); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromRgbBasic(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + new JpegColorConverter.FromRgbBasic(8), + 3, + inputBufferLength, + resultBufferLength, + seed); + } - Assert.Equal(expected, actual); - Assert.Equal(1, rgba.W); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromRgbVector8(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasVector8) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; } + + ValidateConversion( + new JpegColorConverter.FromRgbVector8(8), + 3, + inputBufferLength, + resultBufferLength, + seed); } [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromGrayScale(int inputBufferLength, int resultBufferLength, int seed) + public void FromRgbAvx2(int inputBufferLength, int resultBufferLength, int seed) { - var converter = JpegColorConverter.GetConverter(JpegColorSpace.Grayscale, 8); - JpegColorConverter.ComponentValues values = CreateRandomValues(1, inputBufferLength, seed); - var result = new Vector4[resultBufferLength]; + if (!SimdUtils.HasAvx2) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } - converter.ConvertToRgba(values, result); + ValidateConversion( + new JpegColorConverter.FromRgbAvx2(8), + 3, + inputBufferLength, + resultBufferLength, + seed); + } - for (int i = 0; i < resultBufferLength; i++) - { - float y = values.Component0[i]; - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = new Rgb(y / 255F, y / 255F, y / 255F); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromRgb_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + JpegColorSpace.RGB, + 3, + inputBufferLength, + resultBufferLength, + seed); + } - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); - } + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromYccKBasic(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + new JpegColorConverter.FromYccKBasic(8), + 4, + inputBufferLength, + resultBufferLength, + seed); } [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromRgb(int inputBufferLength, int resultBufferLength, int seed) + public void FromYccKVector8(int inputBufferLength, int resultBufferLength, int seed) { - var converter = JpegColorConverter.GetConverter(JpegColorSpace.RGB, 8); - JpegColorConverter.ComponentValues values = CreateRandomValues(3, inputBufferLength, seed); - var result = new Vector4[resultBufferLength]; + if (!SimdUtils.HasVector8) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } - converter.ConvertToRgba(values, result); + ValidateConversion( + new JpegColorConverter.FromYccKVector8(8), + 4, + inputBufferLength, + resultBufferLength, + seed); + } - for (int i = 0; i < resultBufferLength; i++) + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromYccKAvx2(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasAvx2) { - float r = values.Component0[i]; - float g = values.Component1[i]; - float b = values.Component2[i]; - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = new Rgb(r / 255F, g / 255F, b / 255F); - - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; } + + ValidateConversion( + new JpegColorConverter.FromYccKAvx2(8), + 4, + inputBufferLength, + resultBufferLength, + seed); } [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromYcck(int inputBufferLength, int resultBufferLength, int seed) + public void FromYcck_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) { - var v = new Vector4(0, 0, 0, 1F); - var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F); + ValidateConversion( + JpegColorSpace.Ycck, + 4, + inputBufferLength, + resultBufferLength, + seed); + } - var converter = JpegColorConverter.GetConverter(JpegColorSpace.Ycck, 8); - JpegColorConverter.ComponentValues values = CreateRandomValues(4, inputBufferLength, seed); - var result = new Vector4[resultBufferLength]; + // Benchmark, for local execution only + // [Theory] + // [InlineData(false)] + // [InlineData(true)] + public void BenchmarkYCbCr(bool simd) + { + int count = 2053; + int times = 50000; + + JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1); + var result = new Vector4[count]; + JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector4(8) : new JpegColorConverter.FromYCbCrBasic(8); + + // Warm up: converter.ConvertToRgba(values, result); - for (int i = 0; i < resultBufferLength; i++) + using (new MeasureGuard(this.Output, $"{converter.GetType().Name} x {times}")) { - float y = values.Component0[i]; - float cb = values.Component1[i] - 128F; - float cr = values.Component2[i] - 128F; - float k = values.Component3[i] / 255F; - - v.X = (255F - (float)Math.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k; - v.Y = (255F - (float)Math.Round( - y - (0.344136F * cb) - (0.714136F * cr), - MidpointRounding.AwayFromZero)) * k; - v.Z = (255F - (float)Math.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k; - v.W = 1F; - - v *= scale; - - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = new Rgb(v.X, v.Y, v.Z); - - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); + for (int i = 0; i < times; i++) + { + converter.ConvertToRgba(values, result); + } } } @@ -283,7 +377,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg var buffers = new Buffer2D[componentCount]; for (int i = 0; i < componentCount; i++) { - float[] values = new float[inputBufferLength]; + var values = new float[inputBufferLength]; for (int j = 0; j < inputBufferLength; j++) { @@ -306,7 +400,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg int resultBufferLength, int seed) { - ValidateRgbToYCbCrConversion( + ValidateConversion( JpegColorConverter.GetConverter(colorSpace, 8), componentCount, inputBufferLength, @@ -314,7 +408,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg seed); } - private static void ValidateRgbToYCbCrConversion( + private static void ValidateConversion( JpegColorConverter converter, int componentCount, int inputBufferLength, @@ -328,8 +422,128 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg for (int i = 0; i < resultBufferLength; i++) { - ValidateYCbCr(values, result, i); + Validate(converter.ColorSpace, values, result, i); } } + + private static void Validate( + JpegColorSpace colorSpace, + in JpegColorConverter.ComponentValues values, + Vector4[] result, + int i) + { + switch (colorSpace) + { + case JpegColorSpace.Grayscale: + ValidateGrayScale(values, result, i); + break; + case JpegColorSpace.Ycck: + ValidateCyyK(values, result, i); + break; + case JpegColorSpace.Cmyk: + ValidateCmyk(values, result, i); + break; + case JpegColorSpace.RGB: + ValidateRgb(values, result, i); + break; + case JpegColorSpace.YCbCr: + ValidateYCbCr(values, result, i); + break; + default: + Assert.True(false, $"Colorspace {colorSpace} not supported!"); + break; + } + } + + private static void ValidateYCbCr(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + float y = values.Component0[i]; + float cb = values.Component1[i]; + float cr = values.Component2[i]; + var ycbcr = new YCbCr(y, cb, cr); + + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = ColorSpaceConverter.ToRgb(ycbcr); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } + + private static void ValidateCyyK(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + var v = new Vector4(0, 0, 0, 1F); + var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F); + + float y = values.Component0[i]; + float cb = values.Component1[i] - 128F; + float cr = values.Component2[i] - 128F; + float k = values.Component3[i] / 255F; + + v.X = (255F - (float)Math.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k; + v.Y = (255F - (float)Math.Round( + y - (0.344136F * cb) - (0.714136F * cr), + MidpointRounding.AwayFromZero)) * k; + v.Z = (255F - (float)Math.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k; + v.W = 1F; + + v *= scale; + + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = new Rgb(v.X, v.Y, v.Z); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } + + private static void ValidateRgb(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + float r = values.Component0[i]; + float g = values.Component1[i]; + float b = values.Component2[i]; + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = new Rgb(r / 255F, g / 255F, b / 255F); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } + + private static void ValidateGrayScale(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + float y = values.Component0[i]; + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = new Rgb(y / 255F, y / 255F, y / 255F); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } + + private static void ValidateCmyk(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + var v = new Vector4(0, 0, 0, 1F); + var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F); + + float c = values.Component0[i]; + float m = values.Component1[i]; + float y = values.Component2[i]; + float k = values.Component3[i] / 255F; + + v.X = c * k; + v.Y = m * k; + v.Z = y * k; + v.W = 1F; + + v *= scale; + + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = new Rgb(v.X, v.Y, v.Z); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs index e29d8f158..cc23a45fc 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs @@ -14,22 +14,32 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg public const string DecodeProgressiveJpegOutputName = "DecodeProgressiveJpeg"; [Theory] - [WithFileCollection(nameof(ProgressiveTestJpegs), PixelTypes.Rgba32, false)] - [WithFile(TestImages.Jpeg.Progressive.Progress, PixelTypes.Rgba32, true)] - public void DecodeProgressiveJpeg(TestImageProvider provider, bool enforceDiscontiguousBuffers) + [WithFileCollection(nameof(ProgressiveTestJpegs), PixelTypes.Rgba32)] + public void DecodeProgressiveJpeg(TestImageProvider provider) where TPixel : unmanaged, IPixel + { + using Image image = provider.GetImage(JpegDecoder); + image.DebugSave(provider); + + provider.Utility.TestName = DecodeProgressiveJpegOutputName; + image.CompareToReferenceOutput( + GetImageComparer(provider), + provider, + appendPixelTypeToFileName: false); + } + + [Theory] + [WithFile(TestImages.Jpeg.Progressive.Progress, PixelTypes.Rgba32)] + public void DecodeProgressiveJpeg_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) { static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = + BasicSerializer.Deserialize>(providerDump); - if (!string.IsNullOrEmpty(nonContiguousBuffersStr)) - { - provider.LimitAllocatorBufferCapacity().InBytesSqrt(200); - } + provider.LimitAllocatorBufferCapacity().InBytesSqrt(200); - using Image image = provider.GetImage(JpegDecoder); + using Image image = provider.GetImage(JpegDecoder); image.DebugSave(provider, nonContiguousBuffersStr); provider.Utility.TestName = DecodeProgressiveJpegOutputName; @@ -44,8 +54,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg RemoteExecutor.Invoke( RunTest, providerDump, - enforceDiscontiguousBuffers ? "Disco" : string.Empty) - .Dispose(); + "Disco").Dispose(); } } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs index 78218aec9..182525233 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs @@ -129,10 +129,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Theory] [InlineData(TestImages.Jpeg.Baseline.Jpeg420Small, 0)] [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 1)] - [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 10)] + [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 15)] [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 30)] [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 1)] - [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 10)] + [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 15)] [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 30)] public async Task Decode_IsCancellable(string fileName, int cancellationDelayMs) { @@ -141,17 +141,32 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg TestEnvironment.InputImagesDirectoryFullPath, fileName); - var cts = new CancellationTokenSource(); - if (cancellationDelayMs == 0) - { - cts.Cancel(); - } - else + const int NumberOfRuns = 5; + + for (int i = 0; i < NumberOfRuns; i++) { - cts.CancelAfter(cancellationDelayMs); + var cts = new CancellationTokenSource(); + if (cancellationDelayMs == 0) + { + cts.Cancel(); + } + else + { + cts.CancelAfter(cancellationDelayMs); + } + + try + { + using var image = await Image.LoadAsync(hugeFile, cts.Token); + } + catch (TaskCanceledException) + { + // Succesfully observed a cancellation + return; + } } - await Assert.ThrowsAsync(() => Image.LoadAsync(hugeFile, cts.Token)); + throw new Exception($"No cancellation happened out of {NumberOfRuns} runs!"); } [Theory(Skip = "Identify is too fast, doesn't work reliably.")] diff --git a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs index 5b6adfe1a..2164975df 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs @@ -404,16 +404,15 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png [Theory] [WithFile(TestImages.Png.Splash, PixelTypes.Rgba32)] [WithFile(TestImages.Png.Bike, PixelTypes.Rgba32)] - public void PngDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void PngDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) { static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(PngDecoder); + using Image image = provider.GetImage(PngDecoder); image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr); image.CompareToOriginal(provider); } diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index b9f5f16fa..b4670cb5d 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -2,13 +2,8 @@ // Licensed under the Apache License, Version 2.0. // ReSharper disable InconsistentNaming -using System.Diagnostics; using System.IO; using System.Linq; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics.X86; -#endif -using Microsoft.DotNet.RemoteExecutor; using SixLabors.ImageSharp.Formats; using SixLabors.ImageSharp.Formats.Png; using SixLabors.ImageSharp.Metadata; @@ -16,7 +11,6 @@ using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing.Processors.Quantization; using SixLabors.ImageSharp.Tests.TestUtilities; using SixLabors.ImageSharp.Tests.TestUtilities.ImageComparison; - using Xunit; namespace SixLabors.ImageSharp.Tests.Formats.Png @@ -536,16 +530,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png [Theory] [WithTestPatternImages(100, 100, PixelTypes.Rgba32)] - public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider provider) { - static void RunTest(string providerDump) + static void RunTest(string serialized) { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); -#if SUPPORTS_RUNTIME_INTRINSICS - Assert.False(Ssse3.IsSupported); -#endif + TestImageProvider provider = + FeatureTestRunner.DeserializeForXunit>(serialized); foreach (PngInterlaceMode interlaceMode in InterlaceMode) { @@ -560,19 +550,21 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png } } - string providerDump = BasicSerializer.Serialize(provider); + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.DisableSSSE3, + provider); + } - var processStartInfo = new ProcessStartInfo(); - processStartInfo.Environment[TestEnvironment.Features.EnableSSE3] = TestEnvironment.Features.Off; + [Fact] + public void EncodeFixesInvalidOptions() + { + // https://github.com/SixLabors/ImageSharp/issues/935 + using var ms = new MemoryStream(); + var testFile = TestFile.Create(TestImages.Png.Issue935); + using Image image = testFile.CreateRgba32Image(new PngDecoder()); - RemoteExecutor.Invoke( - RunTest, - providerDump, - new RemoteInvokeOptions - { - StartInfo = processStartInfo - }) - .Dispose(); + image.Save(ms, new PngEncoder { ColorType = PngColorType.RgbWithAlpha }); } private static void TestPngEncoderCore( diff --git a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs index 5fb15541e..edb43aa12 100644 --- a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs @@ -747,16 +747,15 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga [Theory] [WithFile(Bit24BottomLeft, PixelTypes.Rgba32)] [WithFile(Bit32BottomLeft, PixelTypes.Rgba32)] - public void TgaDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void TgaDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) { static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(TgaDecoder); + using Image image = provider.GetImage(TgaDecoder); image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr); if (TestEnvironment.IsWindows) diff --git a/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs b/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs index 0f76d9931..58ed31e61 100644 --- a/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs +++ b/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs @@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Image image, bool useExactComparer = true, float compareTolerance = 0.01f) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { string path = TestImageProvider.GetFilePathOrNull(provider); if (path == null) @@ -39,7 +39,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga } public static Image DecodeWithMagick(Configuration configuration, FileInfo fileInfo) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { using (var magickImage = new MagickImage(fileInfo)) { @@ -48,7 +48,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Assert.True(result.TryGetSinglePixelSpan(out Span resultPixels)); - using (IPixelCollection pixels = magickImage.GetPixelsUnsafe()) + using (IUnsafePixelCollection pixels = magickImage.GetPixelsUnsafe()) { byte[] data = pixels.ToByteArray(PixelMapping.RGBA); diff --git a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs index 27689f681..7d1662387 100644 --- a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs +++ b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs @@ -10,6 +10,21 @@ namespace SixLabors.ImageSharp.Tests.Helpers { public class ImageMathsTests { + [Theory] + [InlineData(0)] + [InlineData(1)] + [InlineData(2)] + [InlineData(3)] + [InlineData(4)] + [InlineData(100)] + [InlineData(123)] + [InlineData(53436353)] + public void Modulo2(int x) + { + int actual = ImageMaths.Modulo2(x); + Assert.Equal(x % 2, actual); + } + [Theory] [InlineData(0)] [InlineData(1)] diff --git a/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs b/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs index c3b8e79ee..2bb43c440 100644 --- a/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs +++ b/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs @@ -17,6 +17,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers [InlineData(0)] [InlineData(1)] [InlineData(30)] + [InlineData(63)] public void Premultiply_VectorSpan(int length) { var rnd = new Random(42); @@ -36,6 +37,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers [InlineData(0)] [InlineData(1)] [InlineData(30)] + [InlineData(63)] public void UnPremultiply_VectorSpan(int length) { var rnd = new Random(42); diff --git a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj index b525bf352..4d76f6f48 100644 --- a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj +++ b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj @@ -26,6 +26,7 @@ + diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs index 6fda9dbba..9d0d09a98 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs @@ -13,34 +13,49 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats { public static class ReferenceImplementations { - public static Rgba32 MakeRgba32(byte r, byte g, byte b, byte a) + public static byte[] MakeRgba32ByteArray(byte r, byte g, byte b, byte a) { - Rgba32 d = default; - d.R = r; - d.G = g; - d.B = b; - d.A = a; - return d; + var buffer = new byte[256]; + + for (int i = 0; i < buffer.Length; i += 4) + { + buffer[i] = r; + buffer[i + 1] = g; + buffer[i + 2] = b; + buffer[i + 3] = a; + } + + return buffer; } - public static Argb32 MakeArgb32(byte r, byte g, byte b, byte a) + public static byte[] MakeArgb32ByteArray(byte r, byte g, byte b, byte a) { - Argb32 d = default; - d.R = r; - d.G = g; - d.B = b; - d.A = a; - return d; + var buffer = new byte[256]; + + for (int i = 0; i < buffer.Length; i += 4) + { + buffer[i] = a; + buffer[i + 1] = r; + buffer[i + 2] = g; + buffer[i + 3] = b; + } + + return buffer; } - public static Bgra32 MakeBgra32(byte r, byte g, byte b, byte a) + public static byte[] MakeBgra32ByteArray(byte r, byte g, byte b, byte a) { - Bgra32 d = default; - d.R = r; - d.G = g; - d.B = b; - d.A = a; - return d; + var buffer = new byte[256]; + + for (int i = 0; i < buffer.Length; i += 4) + { + buffer[i] = b; + buffer[i + 1] = g; + buffer[i + 2] = r; + buffer[i + 3] = a; + } + + return buffer; } internal static void To( @@ -83,8 +98,7 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats if (typeof(TDestinationPixel) == typeof(L8)) { - ref L8 l8Ref = ref MemoryMarshal.GetReference( - MemoryMarshal.Cast(destinationPixels)); + ref L8 l8Ref = ref MemoryMarshal.GetReference(MemoryMarshal.Cast(destinationPixels)); for (int i = 0; i < count; i++) { ref TSourcePixel sp = ref Unsafe.Add(ref sourceRef, i); diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs index 3de6804dc..6eed875f3 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.PixelFormats.Utils; @@ -33,30 +34,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats [MemberData(nameof(RgbaData))] public void ToArgb32(byte r, byte g, byte b, byte a) { - Rgba32 s = ReferenceImplementations.MakeRgba32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromRgba32.ToArgb32(s.PackedValue); + PixelConverter.FromRgba32.ToArgb32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeArgb32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } [Theory] [MemberData(nameof(RgbaData))] public void ToBgra32(byte r, byte g, byte b, byte a) { - Rgba32 s = ReferenceImplementations.MakeRgba32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromRgba32.ToBgra32(s.PackedValue); + PixelConverter.FromRgba32.ToBgra32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeBgra32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } } @@ -66,30 +65,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats [MemberData(nameof(RgbaData))] public void ToRgba32(byte r, byte g, byte b, byte a) { - Argb32 s = ReferenceImplementations.MakeArgb32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromArgb32.ToRgba32(s.PackedValue); + PixelConverter.FromArgb32.ToRgba32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeRgba32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } [Theory] [MemberData(nameof(RgbaData))] public void ToBgra32(byte r, byte g, byte b, byte a) { - Argb32 s = ReferenceImplementations.MakeArgb32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromArgb32.ToBgra32(s.PackedValue); + PixelConverter.FromArgb32.ToBgra32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeBgra32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } } @@ -99,30 +96,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats [MemberData(nameof(RgbaData))] public void ToArgb32(byte r, byte g, byte b, byte a) { - Bgra32 s = ReferenceImplementations.MakeBgra32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromBgra32.ToArgb32(s.PackedValue); + PixelConverter.FromBgra32.ToArgb32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeArgb32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } [Theory] [MemberData(nameof(RgbaData))] public void ToRgba32(byte r, byte g, byte b, byte a) { - Bgra32 s = ReferenceImplementations.MakeBgra32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromBgra32.ToRgba32(s.PackedValue); + PixelConverter.FromBgra32.ToRgba32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeRgba32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } } } diff --git a/tests/ImageSharp.Tests/Processing/Binarization/AdaptiveThresholdTests.cs b/tests/ImageSharp.Tests/Processing/Binarization/AdaptiveThresholdTests.cs index 8efac7593..f4f800107 100644 --- a/tests/ImageSharp.Tests/Processing/Binarization/AdaptiveThresholdTests.cs +++ b/tests/ImageSharp.Tests/Processing/Binarization/AdaptiveThresholdTests.cs @@ -100,6 +100,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Binarization [Theory] [WithFile(TestImages.Png.Bradley01, PixelTypes.Rgba32)] [WithFile(TestImages.Png.Bradley02, PixelTypes.Rgba32)] + [WithFile(TestImages.Png.Ducky, PixelTypes.Rgba32)] public void AdaptiveThreshold_Works(TestImageProvider provider) where TPixel : unmanaged, IPixel { diff --git a/tests/ImageSharp.Tests/Processing/Normalization/HistogramEqualizationTests.cs b/tests/ImageSharp.Tests/Processing/Normalization/HistogramEqualizationTests.cs index 1c1da6f19..4460f04fb 100644 --- a/tests/ImageSharp.Tests/Processing/Normalization/HistogramEqualizationTests.cs +++ b/tests/ImageSharp.Tests/Processing/Normalization/HistogramEqualizationTests.cs @@ -17,7 +17,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Normalization [Theory] [InlineData(256)] [InlineData(65536)] - public void HistogramEqualizationTest(int luminanceLevels) + public void GlobalHistogramEqualization_WithDifferentLumanceLevels(int luminanceLevels) { // Arrange var pixels = new byte[] @@ -45,20 +45,21 @@ namespace SixLabors.ImageSharp.Tests.Processing.Normalization var expected = new byte[] { - 0, 12, 53, 32, 146, 53, 174, 53, - 57, 32, 12, 227, 219, 202, 32, 154, - 65, 85, 93, 239, 251, 227, 65, 158, - 73, 146, 146, 247, 255, 235, 154, 130, - 97, 166, 117, 231, 243, 210, 117, 117, - 117, 190, 36, 190, 178, 93, 20, 170, - 130, 202, 73, 20, 12, 53, 85, 194, - 146, 206, 130, 117, 85, 166, 182, 215 + 0, 12, 53, 32, 146, 53, 174, 53, + 57, 32, 12, 227, 219, 202, 32, 154, + 65, 85, 93, 239, 251, 227, 65, 158, + 73, 146, 146, 247, 255, 235, 154, 130, + 97, 166, 117, 231, 243, 210, 117, 117, + 117, 190, 36, 190, 178, 93, 20, 170, + 130, 202, 73, 20, 12, 53, 85, 194, + 146, 206, 130, 117, 85, 166, 182, 215 }; // Act image.Mutate(x => x.HistogramEqualization(new HistogramEqualizationOptions { - LuminanceLevels = luminanceLevels + LuminanceLevels = luminanceLevels, + Method = HistogramEqualizationMethod.Global })); // Assert @@ -75,6 +76,24 @@ namespace SixLabors.ImageSharp.Tests.Processing.Normalization } } + [Theory] + [WithFile(TestImages.Jpeg.Baseline.HistogramEqImage, PixelTypes.Rgba32)] + public void GlobalHistogramEqualization_CompareToReferenceOutput(TestImageProvider provider) + where TPixel : unmanaged, IPixel + { + using (Image image = provider.GetImage()) + { + var options = new HistogramEqualizationOptions + { + Method = HistogramEqualizationMethod.Global, + LuminanceLevels = 256, + }; + image.Mutate(x => x.HistogramEqualization(options)); + image.DebugSave(provider); + image.CompareToReferenceOutput(ValidatorComparer, provider, extension: "png"); + } + } + [Theory] [WithFile(TestImages.Jpeg.Baseline.LowContrast, PixelTypes.Rgba32)] public void Adaptive_SlidingWindow_15Tiles_WithClipping(TestImageProvider provider) diff --git a/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs b/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs index 50b8782e4..6c48cf843 100644 --- a/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs +++ b/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs @@ -138,21 +138,10 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution public void BokehBlurFilterProcessor(TestImageProvider provider, BokehBlurInfo value) where TPixel : unmanaged, IPixel { - static void RunTest(string providerDump, string infoDump) - { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); - BokehBlurInfo value = BasicSerializer.Deserialize(infoDump); - - provider.RunValidatingProcessorTest( - x => x.BokehBlur(value.Radius, value.Components, value.Gamma), - testOutputDetails: value.ToString(), - appendPixelTypeToFileName: false); - } - - RemoteExecutor - .Invoke(RunTest, BasicSerializer.Serialize(provider), BasicSerializer.Serialize(value)) - .Dispose(); + provider.RunValidatingProcessorTest( + x => x.BokehBlur(value.Radius, value.Components, value.Gamma), + testOutputDetails: value.ToString(), + appendPixelTypeToFileName: false); } [Theory] @@ -164,18 +153,9 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution public void BokehBlurFilterProcessor_WorksWithAllPixelTypes(TestImageProvider provider) where TPixel : unmanaged, IPixel { - static void RunTest(string providerDump) - { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); - provider.RunValidatingProcessorTest( + provider.RunValidatingProcessorTest( x => x.BokehBlur(8, 2, 3), appendSourceFileOrDescription: false); - } - - RemoteExecutor - .Invoke(RunTest, BasicSerializer.Serialize(provider)) - .Dispose(); } [Theory] @@ -183,26 +163,15 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution public void BokehBlurFilterProcessor_Bounded(TestImageProvider provider, BokehBlurInfo value) where TPixel : unmanaged, IPixel { - static void RunTest(string providerDump, string infoDump) - { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); - BokehBlurInfo value = BasicSerializer.Deserialize(infoDump); - - provider.RunValidatingProcessorTest( - x => - { - Size size = x.GetCurrentSize(); - var bounds = new Rectangle(10, 10, size.Width / 2, size.Height / 2); - x.BokehBlur(value.Radius, value.Components, value.Gamma, bounds); - }, - testOutputDetails: value.ToString(), - appendPixelTypeToFileName: false); - } - - RemoteExecutor - .Invoke(RunTest, BasicSerializer.Serialize(provider), BasicSerializer.Serialize(value)) - .Dispose(); + provider.RunValidatingProcessorTest( + x => + { + Size size = x.GetCurrentSize(); + var bounds = new Rectangle(10, 10, size.Width / 2, size.Height / 2); + x.BokehBlur(value.Radius, value.Components, value.Gamma, bounds); + }, + testOutputDetails: value.ToString(), + appendPixelTypeToFileName: false); } [Theory] diff --git a/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeTests.cs b/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeTests.cs index f40b8d11a..47d951837 100644 --- a/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeTests.cs +++ b/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeTests.cs @@ -355,6 +355,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Transforms } [Theory] + [PlatformSpecific(~TestPlatforms.OSX)] [WithFileCollection(nameof(CommonTestImages), DefaultPixelType)] public void ResizeFromSourceRectangle(TestImageProvider provider) where TPixel : unmanaged, IPixel @@ -437,6 +438,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Transforms } [Theory] + [PlatformSpecific(~TestPlatforms.OSX)] [WithFileCollection(nameof(CommonTestImages), DefaultPixelType)] public void ResizeWithBoxPadMode(TestImageProvider provider) where TPixel : unmanaged, IPixel @@ -547,6 +549,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Transforms } [Theory] + [PlatformSpecific(~TestPlatforms.OSX)] [WithFileCollection(nameof(CommonTestImages), DefaultPixelType)] public void ResizeWithPadMode(TestImageProvider provider) where TPixel : unmanaged, IPixel diff --git a/tests/ImageSharp.Tests/TestImages.cs b/tests/ImageSharp.Tests/TestImages.cs index d66f1a5c7..a58ba53f4 100644 --- a/tests/ImageSharp.Tests/TestImages.cs +++ b/tests/ImageSharp.Tests/TestImages.cs @@ -107,6 +107,9 @@ namespace SixLabors.ImageSharp.Tests public const string Issue1177_1 = "Png/issues/Issue_1177_1.png"; public const string Issue1177_2 = "Png/issues/Issue_1177_2.png"; + // Issue 935: https://github.com/SixLabors/ImageSharp/issues/935 + public const string Issue935 = "Png/issues/Issue_935.png"; + public static class Bad { public const string MissingDataChunk = "Png/xdtn0g01.png"; @@ -193,6 +196,7 @@ namespace SixLabors.ImageSharp.Tests public const string YcckSubsample1222 = "Jpg/baseline/ycck-subsample-1222.jpg"; public const string Iptc = "Jpg/baseline/iptc.jpg"; public const string App13WithEmptyIptc = "Jpg/baseline/iptc-psAPP13-wIPTCempty.jpg"; + public const string HistogramEqImage = "Jpg/baseline/640px-Unequalized_Hawkes_Bay_NZ.jpg"; public static readonly string[] All = { diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs new file mode 100644 index 000000000..4720ea78a --- /dev/null +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -0,0 +1,319 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using Microsoft.DotNet.RemoteExecutor; +using Xunit.Abstractions; + +namespace SixLabors.ImageSharp.Tests.TestUtilities +{ + /// + /// Allows the testing against specific feature sets. + /// + public static class FeatureTestRunner + { + private static readonly char[] SplitChars = new[] { ',', ' ' }; + + /// + /// Allows the deserialization of parameters passed to the feature test. + /// + /// + /// This is required because does not allow + /// marshalling of fields so we cannot pass a wrapped + /// allowing automatic deserialization. + /// + /// + /// + /// The type to deserialize to. + /// The string value to deserialize. + /// The value. + public static T DeserializeForXunit(string value) + where T : IXunitSerializable + => BasicSerializer.Deserialize(value); + + /// + /// Allows the deserialization of types implementing + /// passed to the feature test. + /// + /// The string value to deserialize. + /// The value. + public static T Deserialize(string value) + where T : IConvertible + => (T)Convert.ChangeType(value, typeof(T)); + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics) + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(); + } + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// + /// The test action to run. + /// The parameter passed will be a string representing the currently testing . + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics) + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + intrinsic.Key.ToString(), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(intrinsic.Key.ToString()); + } + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + T serializable) + where T : IXunitSerializable + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + BasicSerializer.Serialize(serializable), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(BasicSerializer.Serialize(serializable)); + } + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + T serializable) + where T : IXunitSerializable + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + BasicSerializer.Serialize(serializable), + intrinsic.Key.ToString(), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(BasicSerializer.Serialize(serializable), intrinsic.Key.ToString()); + } + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The value to pass as a parameter to the test action. + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( + Action action, + T serializable, + HwIntrinsics intrinsics) + where T : IConvertible + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + serializable.ToString(), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(serializable.ToString()); + } + } + } + + internal static Dictionary ToFeatureKeyValueCollection(this HwIntrinsics intrinsics) + { + // Loop through and translate the given values into COMPlus equivaluents + var features = new Dictionary(); + foreach (string intrinsic in intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries)) + { + var key = (HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic); + switch (intrinsic) + { + case nameof(HwIntrinsics.DisableSIMD): + features.Add(key, "FeatureSIMD"); + break; + + case nameof(HwIntrinsics.AllowAll): + + // Not a COMPlus value. We filter in calling method. + features.Add(key, nameof(HwIntrinsics.AllowAll)); + break; + + default: + features.Add(key, intrinsic.Replace("Disable", "Enable")); + break; + } + } + + return features; + } + } + + /// + /// See + /// + /// ends up impacting all SIMD support(including System.Numerics) + /// but not things like , , and . + /// + /// + [Flags] +#pragma warning disable RCS1135 // Declare enum member with zero value (when enum has FlagsAttribute). + public enum HwIntrinsics +#pragma warning restore RCS1135 // Declare enum member with zero value (when enum has FlagsAttribute). + { + // Use flags so we can pass multiple values without using params. + // Don't base on 0 or use inverse for All as that doesn't translate to string values. + DisableSIMD = 1 << 0, + DisableHWIntrinsic = 1 << 1, + DisableSSE = 1 << 2, + DisableSSE2 = 1 << 3, + DisableAES = 1 << 4, + DisablePCLMULQDQ = 1 << 5, + DisableSSE3 = 1 << 6, + DisableSSSE3 = 1 << 7, + DisableSSE41 = 1 << 8, + DisableSSE42 = 1 << 9, + DisablePOPCNT = 1 << 10, + DisableAVX = 1 << 11, + DisableFMA = 1 << 12, + DisableAVX2 = 1 << 13, + DisableBMI1 = 1 << 14, + DisableBMI2 = 1 << 15, + DisableLZCNT = 1 << 16, + AllowAll = 1 << 17 + } +} diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index f3af4dabd..d411a6fb7 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -8,6 +8,7 @@ using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; using ImageMagick; +using ImageMagick.Formats.Bmp; using SixLabors.ImageSharp.Formats; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Metadata; @@ -17,10 +18,22 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs { public class MagickReferenceDecoder : IImageDecoder { + private readonly bool validate; + + public MagickReferenceDecoder() + : this(true) + { + } + + public MagickReferenceDecoder(bool validate) + { + this.validate = validate; + } + public static MagickReferenceDecoder Instance { get; } = new MagickReferenceDecoder(); private static void FromRgba32Bytes(Configuration configuration, Span rgbaBytes, IMemoryGroup destinationGroup) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { foreach (Memory m in destinationGroup) { @@ -35,7 +48,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs } private static void FromRgba64Bytes(Configuration configuration, Span rgbaBytes, IMemoryGroup destinationGroup) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { foreach (Memory m in destinationGroup) { @@ -50,44 +63,49 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs } public Task> DecodeAsync(Configuration configuration, Stream stream, CancellationToken cancellationToken) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel => Task.FromResult(this.Decode(configuration, stream)); public Image Decode(Configuration configuration, Stream stream) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { - using var magickImageCollection = new MagickImageCollection(stream); + var bmpReadDefines = new BmpReadDefines + { + IgnoreFileSize = !this.validate + }; + + var settings = new MagickReadSettings(); + settings.SetDefines(bmpReadDefines); + + using var magickImageCollection = new MagickImageCollection(stream, settings); var framesList = new List>(); - foreach (IMagickImage magicFrame in magickImageCollection) + foreach (IMagickImage magicFrame in magickImageCollection) { var frame = new ImageFrame(configuration, magicFrame.Width, magicFrame.Height); framesList.Add(frame); MemoryGroup framePixels = frame.PixelBuffer.FastMemoryGroup; - using (IPixelCollection pixels = magicFrame.GetPixelsUnsafe()) + + using IUnsafePixelCollection pixels = magicFrame.GetPixelsUnsafe(); + if (magicFrame.Depth == 8) + { + byte[] data = pixels.ToByteArray(PixelMapping.RGBA); + + FromRgba32Bytes(configuration, data, framePixels); + } + else if (magicFrame.Depth == 16) + { + ushort[] data = pixels.ToShortArray(PixelMapping.RGBA); + Span bytes = MemoryMarshal.Cast(data.AsSpan()); + FromRgba64Bytes(configuration, bytes, framePixels); + } + else { - if (magicFrame.Depth == 8) - { - byte[] data = pixels.ToByteArray(PixelMapping.RGBA); - - FromRgba32Bytes(configuration, data, framePixels); - } - else if (magicFrame.Depth == 16) - { - ushort[] data = pixels.ToShortArray(PixelMapping.RGBA); - Span bytes = MemoryMarshal.Cast(data.AsSpan()); - FromRgba64Bytes(configuration, bytes, framePixels); - } - else - { - throw new InvalidOperationException(); - } + throw new InvalidOperationException(); } } - var result = new Image(configuration, new ImageMetadata(), framesList); - - return result; + return new Image(configuration, new ImageMetadata(), framesList); } public Image Decode(Configuration configuration, Stream stream) => this.Decode(configuration, stream); diff --git a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Features.cs b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Features.cs deleted file mode 100644 index 3568c1e5d..000000000 --- a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Features.cs +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -namespace SixLabors.ImageSharp.Tests -{ - public static partial class TestEnvironment - { - internal static class Features - { - public const string On = "1"; - public const string Off = "0"; - - // See https://github.com/SixLabors/ImageSharp/pull/1229#discussion_r440477861 - // * EnableHWIntrinsic - // * EnableSSE - // * EnableSSE2 - // * EnableAES - // * EnablePCLMULQDQ - // * EnableSSE3 - // * EnableSSSE3 - // * EnableSSE41 - // * EnableSSE42 - // * EnablePOPCNT - // * EnableAVX - // * EnableFMA - // * EnableAVX2 - // * EnableBMI1 - // * EnableBMI2 - // * EnableLZCNT - // - // `FeatureSIMD` ends up impacting all SIMD support(including `System.Numerics`) but not things - // like `LZCNT`, `BMI1`, or `BMI2` - // `EnableSSE3_4` is a legacy switch that exists for compat and is basically the same as `EnableSSE3` - public const string EnableAES = "COMPlus_EnableAES"; - public const string EnableAVX = "COMPlus_EnableAVX"; - public const string EnableAVX2 = "COMPlus_EnableAVX2"; - public const string EnableBMI1 = "COMPlus_EnableBMI1"; - public const string EnableBMI2 = "COMPlus_EnableBMI2"; - public const string EnableFMA = "COMPlus_EnableFMA"; - public const string EnableHWIntrinsic = "COMPlus_EnableHWIntrinsic"; - public const string EnableLZCNT = "COMPlus_EnableLZCNT"; - public const string EnablePCLMULQDQ = "COMPlus_EnablePCLMULQDQ"; - public const string EnablePOPCNT = "COMPlus_EnablePOPCNT"; - public const string EnableSSE = "COMPlus_EnableSSE"; - public const string EnableSSE2 = "COMPlus_EnableSSE2"; - public const string EnableSSE3 = "COMPlus_EnableSSE3"; - public const string EnableSSE3_4 = "COMPlus_EnableSSE3_4"; - public const string EnableSSE41 = "COMPlus_EnableSSE41"; - public const string EnableSSE42 = "COMPlus_EnableSSE42"; - public const string EnableSSSE3 = "COMPlus_EnableSSSE3"; - public const string FeatureSIMD = "COMPlus_FeatureSIMD"; - } - } -} diff --git a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs index 1375b5763..48728faf0 100644 --- a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs +++ b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs @@ -170,7 +170,10 @@ namespace SixLabors.ImageSharp.Tests } string testProjectConfigPath = TestAssemblyFile.FullName + ".config"; - File.Copy(testProjectConfigPath, remoteExecutorConfigPath); + if (File.Exists(testProjectConfigPath)) + { + File.Copy(testProjectConfigPath, remoteExecutorConfigPath); + } if (Is64BitProcess) { diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs new file mode 100644 index 000000000..4cbbefe68 --- /dev/null +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -0,0 +1,296 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif +using Xunit; +using Xunit.Abstractions; + +namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests +{ + public class FeatureTestRunnerTests + { + public static TheoryData Intrinsics => + new TheoryData + { + { HwIntrinsics.DisableAES | HwIntrinsics.AllowAll, new string[] { "EnableAES", "AllowAll" } }, + { HwIntrinsics.DisableSIMD | HwIntrinsics.DisableHWIntrinsic, new string[] { "FeatureSIMD", "EnableHWIntrinsic" } }, + { HwIntrinsics.DisableSSE42 | HwIntrinsics.DisableAVX, new string[] { "EnableSSE42", "EnableAVX" } } + }; + + [Theory] + [MemberData(nameof(Intrinsics))] + public void ToFeatureCollectionReturnsExpectedResult(HwIntrinsics expectedItrinsics, string[] expectedValues) + { + Dictionary features = expectedItrinsics.ToFeatureKeyValueCollection(); + HwIntrinsics[] keys = features.Keys.ToArray(); + + HwIntrinsics actualIntrinsics = keys[0]; + for (int i = 1; i < keys.Length; i++) + { + actualIntrinsics |= keys[i]; + } + + Assert.Equal(expectedItrinsics, actualIntrinsics); + + IEnumerable actualValues = features.Select(x => x.Value); + Assert.Equal(expectedValues, actualValues); + } + + [Fact] + public void AllowsAllHwIntrinsicFeatures() + { + if (!Vector.IsHardwareAccelerated) + { + return; + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + () => Assert.True(Vector.IsHardwareAccelerated), + HwIntrinsics.AllowAll); + } + + [Fact] + public void CanLimitHwIntrinsicSIMDFeatures() + { + FeatureTestRunner.RunWithHwIntrinsicsFeature( + () => Assert.False(Vector.IsHardwareAccelerated), + HwIntrinsics.DisableSIMD); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void CanLimitHwIntrinsicBaseFeatures() + { + static void AssertDisabled() + { + Assert.False(Sse.IsSupported); + Assert.False(Sse2.IsSupported); + Assert.False(Aes.IsSupported); + Assert.False(Pclmulqdq.IsSupported); + Assert.False(Sse3.IsSupported); + Assert.False(Ssse3.IsSupported); + Assert.False(Sse41.IsSupported); + Assert.False(Sse42.IsSupported); + Assert.False(Popcnt.IsSupported); + Assert.False(Avx.IsSupported); + Assert.False(Fma.IsSupported); + Assert.False(Avx2.IsSupported); + Assert.False(Bmi1.IsSupported); + Assert.False(Bmi2.IsSupported); + Assert.False(Lzcnt.IsSupported); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + AssertDisabled, + HwIntrinsics.DisableHWIntrinsic); + } +#endif + + [Fact] + public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() + { + static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) + { + Assert.NotNull(intrinsic); + + switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + { + case HwIntrinsics.DisableSIMD: + Assert.False(Vector.IsHardwareAccelerated); + break; +#if SUPPORTS_RUNTIME_INTRINSICS + case HwIntrinsics.DisableHWIntrinsic: + Assert.False(Sse.IsSupported); + Assert.False(Sse2.IsSupported); + Assert.False(Aes.IsSupported); + Assert.False(Pclmulqdq.IsSupported); + Assert.False(Sse3.IsSupported); + Assert.False(Ssse3.IsSupported); + Assert.False(Sse41.IsSupported); + Assert.False(Sse42.IsSupported); + Assert.False(Popcnt.IsSupported); + Assert.False(Avx.IsSupported); + Assert.False(Fma.IsSupported); + Assert.False(Avx2.IsSupported); + Assert.False(Bmi1.IsSupported); + Assert.False(Bmi2.IsSupported); + Assert.False(Lzcnt.IsSupported); + break; + case HwIntrinsics.DisableSSE: + Assert.False(Sse.IsSupported); + break; + case HwIntrinsics.DisableSSE2: + Assert.False(Sse2.IsSupported); + break; + case HwIntrinsics.DisableAES: + Assert.False(Aes.IsSupported); + break; + case HwIntrinsics.DisablePCLMULQDQ: + Assert.False(Pclmulqdq.IsSupported); + break; + case HwIntrinsics.DisableSSE3: + Assert.False(Sse3.IsSupported); + break; + case HwIntrinsics.DisableSSSE3: + Assert.False(Ssse3.IsSupported); + break; + case HwIntrinsics.DisableSSE41: + Assert.False(Sse41.IsSupported); + break; + case HwIntrinsics.DisableSSE42: + Assert.False(Sse42.IsSupported); + break; + case HwIntrinsics.DisablePOPCNT: + Assert.False(Popcnt.IsSupported); + break; + case HwIntrinsics.DisableAVX: + Assert.False(Avx.IsSupported); + break; + case HwIntrinsics.DisableFMA: + Assert.False(Fma.IsSupported); + break; + case HwIntrinsics.DisableAVX2: + Assert.False(Avx2.IsSupported); + break; + case HwIntrinsics.DisableBMI1: + Assert.False(Bmi1.IsSupported); + break; + case HwIntrinsics.DisableBMI2: + Assert.False(Bmi2.IsSupported); + break; + case HwIntrinsics.DisableLZCNT: + Assert.False(Lzcnt.IsSupported); + break; +#endif + } + } + + foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + { + FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic); + } + } + + [Fact] + public void CanLimitHwIntrinsicFeaturesWithSerializableParam() + { + static void AssertHwIntrinsicsFeatureDisabled(string serializable) + { + Assert.NotNull(serializable); + Assert.NotNull(FeatureTestRunner.DeserializeForXunit(serializable)); + +#if SUPPORTS_RUNTIME_INTRINSICS + Assert.False(Sse.IsSupported); +#endif + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + AssertHwIntrinsicsFeatureDisabled, + HwIntrinsics.DisableSSE, + new FakeSerializable()); + } + + [Fact] + public void CanLimitHwIntrinsicFeaturesWithSerializableAndIntrinsicsParams() + { + static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) + { + Assert.NotNull(serializable); + Assert.NotNull(FeatureTestRunner.DeserializeForXunit(serializable)); + + switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + { + case HwIntrinsics.DisableSIMD: + Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)); + break; +#if SUPPORTS_RUNTIME_INTRINSICS + case HwIntrinsics.DisableHWIntrinsic: + Assert.False(Sse.IsSupported); + Assert.False(Sse2.IsSupported); + Assert.False(Aes.IsSupported); + Assert.False(Pclmulqdq.IsSupported); + Assert.False(Sse3.IsSupported); + Assert.False(Ssse3.IsSupported); + Assert.False(Sse41.IsSupported); + Assert.False(Sse42.IsSupported); + Assert.False(Popcnt.IsSupported); + Assert.False(Avx.IsSupported); + Assert.False(Fma.IsSupported); + Assert.False(Avx2.IsSupported); + Assert.False(Bmi1.IsSupported); + Assert.False(Bmi2.IsSupported); + Assert.False(Lzcnt.IsSupported); + break; + case HwIntrinsics.DisableSSE: + Assert.False(Sse.IsSupported); + break; + case HwIntrinsics.DisableSSE2: + Assert.False(Sse2.IsSupported); + break; + case HwIntrinsics.DisableAES: + Assert.False(Aes.IsSupported); + break; + case HwIntrinsics.DisablePCLMULQDQ: + Assert.False(Pclmulqdq.IsSupported); + break; + case HwIntrinsics.DisableSSE3: + Assert.False(Sse3.IsSupported); + break; + case HwIntrinsics.DisableSSSE3: + Assert.False(Ssse3.IsSupported); + break; + case HwIntrinsics.DisableSSE41: + Assert.False(Sse41.IsSupported); + break; + case HwIntrinsics.DisableSSE42: + Assert.False(Sse42.IsSupported); + break; + case HwIntrinsics.DisablePOPCNT: + Assert.False(Popcnt.IsSupported); + break; + case HwIntrinsics.DisableAVX: + Assert.False(Avx.IsSupported); + break; + case HwIntrinsics.DisableFMA: + Assert.False(Fma.IsSupported); + break; + case HwIntrinsics.DisableAVX2: + Assert.False(Avx2.IsSupported); + break; + case HwIntrinsics.DisableBMI1: + Assert.False(Bmi1.IsSupported); + break; + case HwIntrinsics.DisableBMI2: + Assert.False(Bmi2.IsSupported); + break; + case HwIntrinsics.DisableLZCNT: + Assert.False(Lzcnt.IsSupported); + break; +#endif + } + } + + foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + { + FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic, new FakeSerializable()); + } + } + + public class FakeSerializable : IXunitSerializable + { + public void Deserialize(IXunitSerializationInfo info) + { + } + + public void Serialize(IXunitSerializationInfo info) + { + } + } + } +} diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs index e72d953ac..e3418220b 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs @@ -67,7 +67,7 @@ namespace SixLabors.ImageSharp.Tests [InlineData("lol/Baz.gif", typeof(GifEncoder))] public void GetReferenceEncoder_ReturnsCorrectEncoders_Windows(string fileName, Type expectedEncoderType) { - if (TestEnvironment.IsLinux) + if (!TestEnvironment.IsWindows) { return; } @@ -83,7 +83,7 @@ namespace SixLabors.ImageSharp.Tests [InlineData("lol/Baz.gif", typeof(GifDecoder))] public void GetReferenceDecoder_ReturnsCorrectDecoders_Windows(string fileName, Type expectedDecoderType) { - if (TestEnvironment.IsLinux) + if (!TestEnvironment.IsWindows) { return; } diff --git a/tests/Images/External b/tests/Images/External index 6a0030806..8b43d14d2 160000 --- a/tests/Images/External +++ b/tests/Images/External @@ -1 +1 @@ -Subproject commit 6a003080674d1fedc66292c13ce5a357b2a33083 +Subproject commit 8b43d14d21ce9b436af3d12a70d38402cdba176b diff --git a/tests/Images/Input/Jpg/baseline/640px-Unequalized_Hawkes_Bay_NZ.jpg b/tests/Images/Input/Jpg/baseline/640px-Unequalized_Hawkes_Bay_NZ.jpg new file mode 100644 index 000000000..bb89de589 --- /dev/null +++ b/tests/Images/Input/Jpg/baseline/640px-Unequalized_Hawkes_Bay_NZ.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1fafc61231325c42d94fe163486a6c5144fb6211ccdceb902d5cb4ddebda9e1 +size 32428 diff --git a/tests/Images/Input/Png/issues/Issue_935.png b/tests/Images/Input/Png/issues/Issue_935.png new file mode 100644 index 000000000..9f9e84dc3 --- /dev/null +++ b/tests/Images/Input/Png/issues/Issue_935.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a9c5cdacc9bedf481c883828de5bfb7902e2bec038fff08830171cf7075e4f9 +size 870 diff --git a/tests/coverlet.runsettings b/tests/coverlet.runsettings index ee408a5f0..cffce3540 100644 --- a/tests/coverlet.runsettings +++ b/tests/coverlet.runsettings @@ -1,5 +1,9 @@ + + + category!=failing +