Browse Source

Merge branch 'master' into tiff-format

pull/1570/head
James Jackson-South 5 years ago
parent
commit
28a88eb683
  1. 6
      .github/workflows/build-and-test.yml
  2. 7
      .runsettings
  3. 2
      Directory.Build.props
  4. 10
      Directory.Build.targets
  5. 1
      ImageSharp.sln
  6. 25
      src/Directory.Build.targets
  7. 6
      src/ImageSharp/Common/Helpers/ImageMaths.cs
  8. 193
      src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs
  9. 103
      src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs
  10. 53
      src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs
  11. 101
      src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs
  12. 103
      src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs
  13. 795
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  14. 275
      src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs
  15. 26
      src/ImageSharp/Common/Helpers/SimdUtils.cs
  16. 79
      src/ImageSharp/Common/Helpers/Vector4Utilities.cs
  17. 20
      src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs
  18. 86
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs
  19. 38
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt
  20. 359
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
  21. 18
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs
  22. 18
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs
  23. 81
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs
  24. 16
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs
  25. 71
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs
  26. 63
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs
  27. 13
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs
  28. 72
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs
  29. 14
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs
  30. 67
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs
  31. 101
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs
  32. 4
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs
  33. 53
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs
  34. 41
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs
  35. 110
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs
  36. 25
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs
  37. 91
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs
  38. 18
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs
  39. 46
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs
  40. 161
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs
  41. 4
      src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
  42. 12
      src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
  43. 22
      src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs
  44. 10
      src/ImageSharp/Image{TPixel}.cs
  45. 134
      src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs
  46. 140
      src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs
  47. 134
      src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs
  48. 129
      src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs
  49. 134
      src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs
  50. 38
      src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude
  51. 222
      src/ImageSharp/PixelFormats/Utils/PixelConverter.cs
  52. 3
      src/ImageSharp/Processing/Processors/Binarization/AdaptiveThresholdProcessor{TPixel}.cs
  53. 62
      src/ImageSharp/Processing/Processors/Normalization/AdaptiveHistogramEqualizationProcessor{TPixel}.cs
  54. 36
      src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs
  55. 19
      tests/Directory.Build.targets
  56. 21
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs
  57. 37
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs
  58. 21
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs
  59. 37
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs
  60. 41
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs
  61. 64
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs
  62. 33
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs
  63. 41
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs
  64. 59
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs
  65. 41
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs
  66. 14
      tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs
  67. 55
      tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs
  68. 87
      tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs
  69. 68
      tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs
  70. 64
      tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs
  71. 95
      tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs
  72. 67
      tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs
  73. 68
      tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs
  74. 65
      tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs
  75. 13
      tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs
  76. 68
      tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs
  77. 84
      tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs
  78. 2
      tests/ImageSharp.Benchmarks/Config.cs
  79. 56
      tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs
  80. 3
      tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj
  81. 53
      tests/ImageSharp.Benchmarks/Processing/HistogramEqualization.cs
  82. 399
      tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs
  83. 38
      tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
  84. 36
      tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs
  85. 22
      tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs
  86. 11
      tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs
  87. 186
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
  88. 490
      tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs
  89. 33
      tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs
  90. 35
      tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs
  91. 7
      tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs
  92. 42
      tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs
  93. 7
      tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs
  94. 6
      tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs
  95. 15
      tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs
  96. 2
      tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs
  97. 1
      tests/ImageSharp.Tests/ImageSharp.Tests.csproj
  98. 60
      tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs
  99. 67
      tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs
  100. 1
      tests/ImageSharp.Tests/Processing/Binarization/AdaptiveThresholdTests.cs

6
.github/workflows/build-and-test.yml

@ -15,13 +15,17 @@ jobs:
matrix:
options:
- os: ubuntu-latest
framework: netcoreapp3.1
runtime: -x64
codecov: true
- os: macos-latest
framework: netcoreapp3.1
runtime: -x64
codecov: false
- os: windows-latest
framework: netcoreapp3.1
runtime: -x64
codecov: true
codecov: false
- os: windows-latest
framework: netcoreapp2.1
runtime: -x64

7
.runsettings

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="utf-8" ?>
<RunSettings>
<RunConfiguration>
<!--Used in conjunction with ActiveIssueAttribute to skip tests with known issues-->
<TestCaseFilter>category!=failing</TestCaseFilter>
</RunConfiguration>
</RunSettings>

2
Directory.Build.props

@ -15,6 +15,7 @@
<BaseArtifactsPath>$(MSBuildThisFileDirectory)artifacts/</BaseArtifactsPath>
<BaseArtifactsPathSuffix>$(SixLaborsProjectCategory)/$(MSBuildProjectName)</BaseArtifactsPathSuffix>
<RepositoryUrl Condition="'$(RepositoryUrl)' == ''">https://github.com/SixLabors/ImageSharp/</RepositoryUrl>
<RunSettingsFilePath>$(MSBuildThisFileDirectory)/.runsettings</RunSettingsFilePath>
</PropertyGroup>
<!-- Default settings that explicitly differ from the Sdk.props defaults -->
@ -120,6 +121,7 @@
https://api.nuget.org/v3/index.json;
<!-- Contains RemoteExecutor. Taken from: https://github.com/dotnet/runtime/blob/master/NuGet.config -->
https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-eng/nuget/v3/index.json;
https://www.myget.org/F/coverlet-dev/api/v3/index.json;
</RestoreSources>
<SignAssembly>true</SignAssembly>
<AssemblyOriginatorKeyFile>$(MSBuildThisFileDirectory)shared-infrastructure/SixLabors.snk</AssemblyOriginatorKeyFile>

10
Directory.Build.targets

@ -18,22 +18,18 @@
<!-- Package versions for package references across all projects -->
<ItemGroup>
<!--Global Dependencies-->
<PackageReference Update="Microsoft.Net.Compilers.Toolset" PrivateAssets="All" Version="3.3.1" />
<PackageReference Update="Microsoft.Net.Compilers.Toolset" PrivateAssets="All" Version="3.7.0" />
<PackageReference Update="Microsoft.NETFramework.ReferenceAssemblies" PrivateAssets="All" Version="1.0.0" />
<PackageReference Update="StyleCop.Analyzers" PrivateAssets="All" Version="1.1.118" />
<!--Src Dependencies-->
<PackageReference Update="Microsoft.SourceLink.GitHub" Version="1.0.0" PrivateAssets="All"/>
<PackageReference Update="MinVer" PrivateAssets="All" Version="2.3.0" />
<PackageReference Update="MinVer" PrivateAssets="All" Version="2.3.1" />
<PackageReference Update="System.Buffers" Version="4.5.1" />
<PackageReference Update="System.IO.Compression" Version="4.3.0" />
<PackageReference Update="System.IO.UnmanagedMemoryStream" Version="4.3.0" />
<PackageReference Update="System.Numerics.Vectors" Version="4.5.0" />
<!--
Do no update System.Memory as it currently breaks the CI build
with FileNotFoundException for SixLabors.ImageSharp.Tests.dll.config
-->
<PackageReference Update="System.Memory" Version="4.5.3" />
<PackageReference Update="System.Memory" Version="4.5.4" />
<PackageReference Update="System.Runtime.CompilerServices.Unsafe" Version="4.7.1" />
<PackageReference Update="System.Threading.Tasks.Parallel" Version="4.3.0" />
<PackageReference Update="System.ValueTuple" Version="4.5.0" />

1
ImageSharp.sln

@ -8,6 +8,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
.gitattributes = .gitattributes
.gitignore = .gitignore
.gitmodules = .gitmodules
.runsettings = .runsettings
ci-build.ps1 = ci-build.ps1
ci-pack.ps1 = ci-pack.ps1
ci-test.ps1 = ci-test.ps1

25
src/Directory.Build.targets

@ -21,16 +21,25 @@
</PropertyGroup>
<!-- Workaround for running Coverlet with Determenistic builds -->
<!-- https://github.com/coverlet-coverage/coverlet/blob/master/Documentation/DeterministicBuild.md -->
<PropertyGroup>
<TargetFrameworkMonikerAssemblyAttributesPath>$([System.IO.Path]::Combine('$(IntermediateOutputPath)','$(TargetFrameworkMoniker).AssemblyAttributes$(DefaultLanguageSourceExtension)'))</TargetFrameworkMonikerAssemblyAttributesPath>
</PropertyGroup>
<ItemGroup>
<EmbeddedFiles Include="$(GeneratedAssemblyInfoFile)"/>
</ItemGroup>
<ItemGroup>
<SourceRoot Include="$(NuGetPackageRoot)" />
</ItemGroup>
<Target Name="CoverletGetPathMap"
DependsOnTargets="InitializeSourceRootMappedPaths"
Returns="@(_LocalTopLevelSourceRoot)"
Condition="'$(DeterministicSourcePaths)' == 'true'">
DependsOnTargets="InitializeSourceRootMappedPaths"
Returns="@(_LocalTopLevelSourceRoot)"
Condition="'$(DeterministicSourcePaths)' == 'true'">
<ItemGroup>
<_LocalTopLevelSourceRoot Include="@(SourceRoot)" Condition="'%(SourceRoot.NestedRoot)' == ''"/>
</ItemGroup>
</Target>
<ItemDefinitionGroup>
<InternalsVisibleTo>
<Visible>false</Visible>
@ -62,7 +71,7 @@
<!-- Empty target so that `dotnet test` will work on the solution -->
<!-- https://github.com/Microsoft/vstest/issues/411 -->
<Target Name="VSTest" Condition="'$(IsTestProject)' == 'true'"/>
<ItemGroup>
<!--Shared config files that have to exist at root level.-->
<ConfigFilesToCopy Include="..\..\shared-infrastructure\.editorconfig;..\..\shared-infrastructure\.gitattributes" />
@ -74,7 +83,7 @@
SkipUnchangedFiles = "true"
DestinationFolder="..\..\" />
</Target>
<!-- Allows regenerating T4-generated files at build time using MsBuild -->
<!-- Enable on Windows OS to build all T4 templates. TODO: XPlat
<Import Project="$(MSBuildExtensionsPath)\Microsoft\VisualStudio\v$(VisualStudioVersion)\TextTemplating\Microsoft.TextTemplating.targets" />
@ -82,5 +91,5 @@
<TransformOnBuild>true</TransformOnBuild>
</PropertyGroup>
-->
</Project>

6
src/ImageSharp/Common/Helpers/ImageMaths.cs

@ -132,6 +132,12 @@ namespace SixLabors.ImageSharp
return (a / GreatestCommonDivisor(a, b)) * b;
}
/// <summary>
/// Calculates <paramref name="x"/> % 2
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static int Modulo2(int x) => x & 1;
/// <summary>
/// Calculates <paramref name="x"/> % 4
/// </summary>

193
src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs

@ -0,0 +1,193 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Buffers.Binary;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
// and ROTR (Rotate Right) emitting efficient CPU instructions:
// https://github.com/dotnet/coreclr/pull/1830
namespace SixLabors.ImageSharp
{
/// <summary>
/// Defines the contract for methods that allow the shuffling of pixel components.
/// Used for shuffling on platforms that do not support Hardware Intrinsics.
/// </summary>
internal interface IComponentShuffle
{
/// <summary>
/// Gets the shuffle control.
/// </summary>
byte Control { get; }
/// <summary>
/// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest);
}
/// <inheritdoc/>
internal interface IShuffle4 : IComponentShuffle
{
}
internal readonly struct DefaultShuffle4 : IShuffle4
{
private readonly byte p3;
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;
public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0)
{
DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));
this.p3 = p3;
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
}
public byte Control { get; }
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
int p3 = this.p3;
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;
for (int i = 0; i < source.Length; i += 4)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i);
}
}
}
internal readonly struct WXYZShuffle4 : IShuffle4
{
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3);
}
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// ROTL(8, packed) = [Z Y X W]
Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24);
}
}
}
internal readonly struct WZYXShuffle4 : IShuffle4
{
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3);
}
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// REVERSE(packedArgb) = [X Y Z W]
Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed);
}
}
}
internal readonly struct YZWXShuffle4 : IShuffle4
{
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1);
}
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// ROTR(8, packedArgb) = [Y Z W X]
Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24);
}
}
}
internal readonly struct ZYXWShuffle4 : IShuffle4
{
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
}
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
// packed = [W Z Y X]
// tmp1 = [W 0 Y 0]
// tmp2 = [0 Z 0 X]
// tmp3=ROTL(16, tmp2) = [0 X 0 Z]
// tmp1 + tmp3 = [W X Y Z]
uint tmp1 = packed & 0xFF00FF00;
uint tmp2 = packed & 0x00FF00FF;
uint tmp3 = (tmp2 << 16) | (tmp2 >> 16);
Unsafe.Add(ref dBase, i) = tmp1 + tmp3;
}
}
}
}

103
src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs

@ -0,0 +1,103 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
/// <inheritdoc/>
internal interface IPad3Shuffle4 : IComponentShuffle
{
}
internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4
{
private readonly byte p3;
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;
public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0)
{
DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));
this.p3 = p3;
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
}
public byte Control { get; }
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
int p3 = this.p3;
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;
Span<byte> temp = stackalloc byte[4];
ref byte t = ref MemoryMarshal.GetReference(temp);
ref uint tu = ref Unsafe.As<byte, uint>(ref t);
for (int i = 0, j = 0; i < source.Length; i += 3, j += 4)
{
ref var s = ref Unsafe.Add(ref sBase, i);
tu = Unsafe.As<byte, uint>(ref s) | 0xFF000000;
Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0);
Unsafe.Add(ref dBase, j + 1) = Unsafe.Add(ref t, p1);
Unsafe.Add(ref dBase, j + 2) = Unsafe.Add(ref t, p2);
Unsafe.Add(ref dBase, j + 3) = Unsafe.Add(ref t, p3);
}
}
}
internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4
{
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0);
}
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
ref byte sEnd = ref Unsafe.Add(ref sBase, source.Length);
ref byte sLoopEnd = ref Unsafe.Subtract(ref sEnd, 4);
while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd))
{
Unsafe.As<byte, uint>(ref dBase) = Unsafe.As<byte, uint>(ref sBase) | 0xFF000000;
sBase = ref Unsafe.Add(ref sBase, 3);
dBase = ref Unsafe.Add(ref dBase, 4);
}
while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd))
{
Unsafe.Add(ref dBase, 0) = Unsafe.Add(ref sBase, 0);
Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1);
Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2);
Unsafe.Add(ref dBase, 3) = byte.MaxValue;
sBase = ref Unsafe.Add(ref sBase, 3);
dBase = ref Unsafe.Add(ref dBase, 4);
}
}
}
}

53
src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs

@ -0,0 +1,53 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
/// <inheritdoc/>
internal interface IShuffle3 : IComponentShuffle
{
}
internal readonly struct DefaultShuffle3 : IShuffle3
{
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;
public DefaultShuffle3(byte p2, byte p1, byte p0)
{
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 2, nameof(p2));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 2, nameof(p1));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 2, nameof(p0));
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0);
}
public byte Control { get; }
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;
for (int i = 0; i < source.Length; i += 3)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
}
}
}
}

101
src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs

@ -0,0 +1,101 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
/// <inheritdoc/>
internal interface IShuffle4Slice3 : IComponentShuffle
{
}
internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3
{
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;
public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0)
{
DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
}
public byte Control { get; }
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;
for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + j);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + j);
}
}
}
internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3
{
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0);
}
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref Byte3 dBase = ref Unsafe.As<byte, Byte3>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;
int m = ImageMaths.Modulo4(n);
int u = n - m;
ref uint sLoopEnd = ref Unsafe.Add(ref sBase, u);
ref uint sEnd = ref Unsafe.Add(ref sBase, n);
while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd))
{
Unsafe.Add(ref dBase, 0) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 0));
Unsafe.Add(ref dBase, 1) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 1));
Unsafe.Add(ref dBase, 2) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 2));
Unsafe.Add(ref dBase, 3) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 3));
sBase = ref Unsafe.Add(ref sBase, 4);
dBase = ref Unsafe.Add(ref dBase, 4);
}
while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd))
{
Unsafe.Add(ref dBase, 0) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 0));
sBase = ref Unsafe.Add(ref sBase, 1);
dBase = ref Unsafe.Add(ref dBase, 1);
}
}
}
[StructLayout(LayoutKind.Explicit, Size = 3)]
internal readonly struct Byte3
{
}
}

103
src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs

@ -1,103 +0,0 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
#if SUPPORTS_RUNTIME_INTRINSICS
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace SixLabors.ImageSharp
{
internal static partial class SimdUtils
{
public static class Avx2Intrinsics
{
private static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 };
/// <summary>
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void NormalizedFloatToByteSaturateReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
if (Avx2.IsSupported)
{
int remainder = ImageMaths.ModuloP2(source.Length, Vector<byte>.Count);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
NormalizedFloatToByteSaturate(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount));
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
}
/// <summary>
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
/// </summary>
/// <remarks>
/// Implementation is based on MagicScaler code:
/// https://github.com/saucecontrol/PhotoSauce/blob/a9bd6e5162d2160419f0cf743fd4f536c079170b/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L453-L477
/// </remarks>
internal static void NormalizedFloatToByteSaturate(
ReadOnlySpan<float> source,
Span<byte> dest)
{
VerifySpanInput(source, dest, Vector256<byte>.Count);
int n = dest.Length / Vector256<byte>.Count;
ref Vector256<float> sourceBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
ref Vector256<byte> destBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
var maxBytes = Vector256.Create(255f);
ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32);
Vector256<int> mask = Unsafe.As<byte, Vector256<int>>(ref maskBase);
for (int i = 0; i < n; i++)
{
ref Vector256<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
Vector256<float> f0 = s;
Vector256<float> f1 = Unsafe.Add(ref s, 1);
Vector256<float> f2 = Unsafe.Add(ref s, 2);
Vector256<float> f3 = Unsafe.Add(ref s, 3);
Vector256<int> w0 = ConvertToInt32(f0, maxBytes);
Vector256<int> w1 = ConvertToInt32(f1, maxBytes);
Vector256<int> w2 = ConvertToInt32(f2, maxBytes);
Vector256<int> w3 = ConvertToInt32(f3, maxBytes);
Vector256<short> u0 = Avx2.PackSignedSaturate(w0, w1);
Vector256<short> u1 = Avx2.PackSignedSaturate(w2, w3);
Vector256<byte> b = Avx2.PackUnsignedSaturate(u0, u1);
b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte();
Unsafe.Add(ref destBase, i) = b;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector256<int> ConvertToInt32(Vector256<float> vf, Vector256<float> scale)
{
vf = Avx.Multiply(vf, scale);
return Avx.ConvertToVector256Int32(vf);
}
}
}
}
#endif

795
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -0,0 +1,795 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
#if SUPPORTS_RUNTIME_INTRINSICS
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace SixLabors.ImageSharp
{
internal static partial class SimdUtils
{
public static class HwIntrinsics
{
public static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 };
public static ReadOnlySpan<byte> PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 };
private static ReadOnlySpan<byte> ShuffleMaskPad4Nx16 => new byte[] { 0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80 };
private static ReadOnlySpan<byte> ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 };
/// <summary>
/// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of floats.</param>
/// <param name="dest">The destination span of floats.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4Reduce(
ref ReadOnlySpan<float> source,
ref Span<float> dest,
byte control)
{
if (Avx.IsSupported || Sse.IsSupported)
{
int remainder = Avx.IsSupported
? ImageMaths.ModuloP2(source.Length, Vector256<float>.Count)
: ImageMaths.ModuloP2(source.Length, Vector128<float>.Count);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
Shuffle4(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount),
control);
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
}
/// <summary>
/// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4Reduce(
ref ReadOnlySpan<byte> source,
ref Span<byte> dest,
byte control)
{
if (Avx2.IsSupported || Ssse3.IsSupported)
{
int remainder = Avx2.IsSupported
? ImageMaths.ModuloP2(source.Length, Vector256<byte>.Count)
: ImageMaths.ModuloP2(source.Length, Vector128<byte>.Count);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
Shuffle4(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount),
control);
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
}
/// <summary>
/// Shuffles 8-bit integer triplets within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle3Reduce(
ref ReadOnlySpan<byte> source,
ref Span<byte> dest,
byte control)
{
if (Ssse3.IsSupported)
{
int remainder = source.Length % (Vector128<byte>.Count * 3);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
Shuffle3(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount),
control);
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
}
/// <summary>
/// Pads then shuffles 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Pad3Shuffle4Reduce(
ref ReadOnlySpan<byte> source,
ref Span<byte> dest,
byte control)
{
if (Ssse3.IsSupported)
{
int remainder = source.Length % (Vector128<byte>.Count * 3);
int sourceCount = source.Length - remainder;
int destCount = sourceCount * 4 / 3;
if (sourceCount > 0)
{
Pad3Shuffle4(
source.Slice(0, sourceCount),
dest.Slice(0, destCount),
control);
source = source.Slice(sourceCount);
dest = dest.Slice(destCount);
}
}
}
/// <summary>
/// Shuffles then slices 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4Slice3Reduce(
ref ReadOnlySpan<byte> source,
ref Span<byte> dest,
byte control)
{
if (Ssse3.IsSupported)
{
int remainder = source.Length % (Vector128<byte>.Count * 4);
int sourceCount = source.Length - remainder;
int destCount = sourceCount * 3 / 4;
if (sourceCount > 0)
{
Shuffle4Slice3(
source.Slice(0, sourceCount),
dest.Slice(0, destCount),
control);
source = source.Slice(sourceCount);
dest = dest.Slice(destCount);
}
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void Shuffle4(
ReadOnlySpan<float> source,
Span<float> dest,
byte control)
{
if (Avx.IsSupported)
{
ref Vector256<float> sourceBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
ref Vector256<float> destBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
int n = dest.Length / Vector256<float>.Count;
int m = ImageMaths.Modulo4(n);
int u = n - m;
for (int i = 0; i < u; i += 4)
{
ref Vector256<float> vd0 = ref Unsafe.Add(ref destBase, i);
ref Vector256<float> vs0 = ref Unsafe.Add(ref sourceBase, i);
vd0 = Avx.Permute(vs0, control);
Unsafe.Add(ref vd0, 1) = Avx.Permute(Unsafe.Add(ref vs0, 1), control);
Unsafe.Add(ref vd0, 2) = Avx.Permute(Unsafe.Add(ref vs0, 2), control);
Unsafe.Add(ref vd0, 3) = Avx.Permute(Unsafe.Add(ref vs0, 3), control);
}
if (m > 0)
{
for (int i = u; i < n; i++)
{
Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control);
}
}
}
else
{
// Sse
ref Vector128<float> sourceBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source));
ref Vector128<float> destBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
int n = dest.Length / Vector128<float>.Count;
int m = ImageMaths.Modulo4(n);
int u = n - m;
for (int i = 0; i < u; i += 4)
{
ref Vector128<float> vd0 = ref Unsafe.Add(ref destBase, i);
ref Vector128<float> vs0 = ref Unsafe.Add(ref sourceBase, i);
vd0 = Sse.Shuffle(vs0, vs0, control);
Vector128<float> vs1 = Unsafe.Add(ref vs0, 1);
Unsafe.Add(ref vd0, 1) = Sse.Shuffle(vs1, vs1, control);
Vector128<float> vs2 = Unsafe.Add(ref vs0, 2);
Unsafe.Add(ref vd0, 2) = Sse.Shuffle(vs2, vs2, control);
Vector128<float> vs3 = Unsafe.Add(ref vs0, 3);
Unsafe.Add(ref vd0, 3) = Sse.Shuffle(vs3, vs3, control);
}
if (m > 0)
{
for (int i = u; i < n; i++)
{
Vector128<float> vs = Unsafe.Add(ref sourceBase, i);
Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control);
}
}
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void Shuffle4(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
{
if (Avx2.IsSupported)
{
// I've chosen to do this for convenience while we determine what
// shuffle controls to add to the library.
// We can add static ROS instances if need be in the future.
Span<byte> bytes = stackalloc byte[Vector256<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector256<byte> vshuffle = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector256<byte> sourceBase =
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector256<byte> destBase =
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
int n = dest.Length / Vector256<byte>.Count;
int m = ImageMaths.Modulo4(n);
int u = n - m;
for (int i = 0; i < u; i += 4)
{
ref Vector256<byte> vs0 = ref Unsafe.Add(ref sourceBase, i);
ref Vector256<byte> vd0 = ref Unsafe.Add(ref destBase, i);
vd0 = Avx2.Shuffle(vs0, vshuffle);
Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle);
Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle);
Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle);
}
if (m > 0)
{
for (int i = u; i < n; i++)
{
Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle);
}
}
}
else
{
// Ssse3
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector128<byte> destBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
int n = dest.Length / Vector128<byte>.Count;
int m = ImageMaths.Modulo4(n);
int u = n - m;
for (int i = 0; i < u; i += 4)
{
ref Vector128<byte> vs0 = ref Unsafe.Add(ref sourceBase, i);
ref Vector128<byte> vd0 = ref Unsafe.Add(ref destBase, i);
vd0 = Ssse3.Shuffle(vs0, vshuffle);
Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle);
Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle);
Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle);
}
if (m > 0)
{
for (int i = u; i < n; i++)
{
Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle);
}
}
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void Shuffle3(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
{
if (Ssse3.IsSupported)
{
ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16);
Vector128<byte> vmask = Unsafe.As<byte, Vector128<byte>>(ref vmaskBase);
ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16);
Vector128<byte> vmasko = Unsafe.As<byte, Vector128<byte>>(ref vmaskoBase);
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector128<byte> destBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / Vector128<byte>.Count;
for (int i = 0; i < n; i += 3)
{
ref Vector128<byte> vs = ref Unsafe.Add(ref sourceBase, i);
Vector128<byte> v0 = vs;
Vector128<byte> v1 = Unsafe.Add(ref vs, 1);
Vector128<byte> v2 = Unsafe.Add(ref vs, 2);
Vector128<byte> v3 = Sse2.ShiftRightLogical128BitLane(v2, 4);
v2 = Ssse3.AlignRight(v2, v1, 8);
v1 = Ssse3.AlignRight(v1, v0, 12);
v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vmask), vshuffle);
v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vmask), vshuffle);
v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vmask), vshuffle);
v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vmask), vshuffle);
v0 = Ssse3.Shuffle(v0, vmaske);
v1 = Ssse3.Shuffle(v1, vmasko);
v2 = Ssse3.Shuffle(v2, vmaske);
v3 = Ssse3.Shuffle(v3, vmasko);
v0 = Ssse3.AlignRight(v1, v0, 4);
v3 = Ssse3.AlignRight(v3, v2, 12);
v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4);
v2 = Sse2.ShiftRightLogical128BitLane(v2, 4);
v1 = Ssse3.AlignRight(v2, v1, 8);
ref Vector128<byte> vd = ref Unsafe.Add(ref destBase, i);
vd = v0;
Unsafe.Add(ref vd, 1) = v1;
Unsafe.Add(ref vd, 2) = v3;
}
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void Pad3Shuffle4(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
{
if (Ssse3.IsSupported)
{
ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16);
Vector128<byte> vmask = Unsafe.As<byte, Vector128<byte>>(ref vmaskBase);
Vector128<byte> vfill = Vector128.Create(0xff000000ff000000ul).AsByte();
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector128<byte> destBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / Vector128<byte>.Count;
for (int i = 0, j = 0; i < n; i += 3, j += 4)
{
ref Vector128<byte> v0 = ref Unsafe.Add(ref sourceBase, i);
Vector128<byte> v1 = Unsafe.Add(ref v0, 1);
Vector128<byte> v2 = Unsafe.Add(ref v0, 2);
Vector128<byte> v3 = Sse2.ShiftRightLogical128BitLane(v2, 4);
v2 = Ssse3.AlignRight(v2, v1, 8);
v1 = Ssse3.AlignRight(v1, v0, 12);
ref Vector128<byte> vd = ref Unsafe.Add(ref destBase, j);
vd = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v0, vmask), vfill), vshuffle);
Unsafe.Add(ref vd, 1) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v1, vmask), vfill), vshuffle);
Unsafe.Add(ref vd, 2) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v2, vmask), vfill), vshuffle);
Unsafe.Add(ref vd, 3) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v3, vmask), vfill), vshuffle);
}
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void Shuffle4Slice3(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
{
if (Ssse3.IsSupported)
{
ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16);
Vector128<byte> vmasko = Unsafe.As<byte, Vector128<byte>>(ref vmaskoBase);
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector128<byte> destBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / Vector128<byte>.Count;
for (int i = 0, j = 0; i < n; i += 4, j += 3)
{
ref Vector128<byte> vs = ref Unsafe.Add(ref sourceBase, i);
Vector128<byte> v0 = vs;
Vector128<byte> v1 = Unsafe.Add(ref vs, 1);
Vector128<byte> v2 = Unsafe.Add(ref vs, 2);
Vector128<byte> v3 = Unsafe.Add(ref vs, 3);
v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vshuffle), vmaske);
v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vshuffle), vmasko);
v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vshuffle), vmaske);
v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vshuffle), vmasko);
v0 = Ssse3.AlignRight(v1, v0, 4);
v3 = Ssse3.AlignRight(v3, v2, 12);
v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4);
v2 = Sse2.ShiftRightLogical128BitLane(v2, 4);
v1 = Ssse3.AlignRight(v2, v1, 8);
ref Vector128<byte> vd = ref Unsafe.Add(ref destBase, j);
vd = v0;
Unsafe.Add(ref vd, 1) = v1;
Unsafe.Add(ref vd, 2) = v3;
}
}
}
/// <summary>
/// Performs a multiplication and an addition of the <see cref="Vector256{T}"/>.
/// </summary>
/// <param name="va">The vector to add to the intermediate result.</param>
/// <param name="vm0">The first vector to multiply.</param>
/// <param name="vm1">The second vector to multiply.</param>
/// <returns>The <see cref="Vector256{T}"/>.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public static Vector256<float> MultiplyAdd(
in Vector256<float> va,
in Vector256<float> vm0,
in Vector256<float> vm1)
{
if (Fma.IsSupported)
{
return Fma.MultiplyAdd(vm1, vm0, va);
}
else
{
return Avx.Add(Avx.Multiply(vm0, vm1), va);
}
}
/// <summary>
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void ByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source,
ref Span<float> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
if (Avx2.IsSupported || Sse2.IsSupported)
{
int remainder;
if (Avx2.IsSupported)
{
remainder = ImageMaths.ModuloP2(source.Length, Vector256<byte>.Count);
}
else
{
remainder = ImageMaths.ModuloP2(source.Length, Vector128<byte>.Count);
}
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
ByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount));
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
}
/// <summary>
/// Implementation <see cref="SimdUtils.ByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime.
/// </summary>
/// <remarks>
/// Implementation is based on MagicScaler code:
/// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L80-L182
/// </remarks>
internal static unsafe void ByteToNormalizedFloat(
ReadOnlySpan<byte> source,
Span<float> dest)
{
if (Avx2.IsSupported)
{
VerifySpanInput(source, dest, Vector256<byte>.Count);
int n = dest.Length / Vector256<byte>.Count;
byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source));
ref Vector256<float> destBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
var scale = Vector256.Create(1 / (float)byte.MaxValue);
for (int i = 0; i < n; i++)
{
int si = Vector256<byte>.Count * i;
Vector256<int> i0 = Avx2.ConvertToVector256Int32(sourceBase + si);
Vector256<int> i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256<int>.Count);
Vector256<int> i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 2));
Vector256<int> i3 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 3));
Vector256<float> f0 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i0));
Vector256<float> f1 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i1));
Vector256<float> f2 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i2));
Vector256<float> f3 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i3));
ref Vector256<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
}
else
{
// Sse
VerifySpanInput(source, dest, Vector128<byte>.Count);
int n = dest.Length / Vector128<byte>.Count;
byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source));
ref Vector128<float> destBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
var scale = Vector128.Create(1 / (float)byte.MaxValue);
Vector128<byte> zero = Vector128<byte>.Zero;
for (int i = 0; i < n; i++)
{
int si = Vector128<byte>.Count * i;
Vector128<int> i0, i1, i2, i3;
if (Sse41.IsSupported)
{
i0 = Sse41.ConvertToVector128Int32(sourceBase + si);
i1 = Sse41.ConvertToVector128Int32(sourceBase + si + Vector128<int>.Count);
i2 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 2));
i3 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 3));
}
else
{
Vector128<byte> b = Sse2.LoadVector128(sourceBase + si);
Vector128<short> s0 = Sse2.UnpackLow(b, zero).AsInt16();
Vector128<short> s1 = Sse2.UnpackHigh(b, zero).AsInt16();
i0 = Sse2.UnpackLow(s0, zero.AsInt16()).AsInt32();
i1 = Sse2.UnpackHigh(s0, zero.AsInt16()).AsInt32();
i2 = Sse2.UnpackLow(s1, zero.AsInt16()).AsInt32();
i3 = Sse2.UnpackHigh(s1, zero.AsInt16()).AsInt32();
}
Vector128<float> f0 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i0));
Vector128<float> f1 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i1));
Vector128<float> f2 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i2));
Vector128<float> f3 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i3));
ref Vector128<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
}
}
/// <summary>
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void NormalizedFloatToByteSaturateReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
if (Avx2.IsSupported || Sse2.IsSupported)
{
int remainder;
if (Avx2.IsSupported)
{
remainder = ImageMaths.ModuloP2(source.Length, Vector256<byte>.Count);
}
else
{
remainder = ImageMaths.ModuloP2(source.Length, Vector128<byte>.Count);
}
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
NormalizedFloatToByteSaturate(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount));
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
}
/// <summary>
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
/// </summary>
/// <remarks>
/// Implementation is based on MagicScaler code:
/// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L541-L622
/// </remarks>
internal static void NormalizedFloatToByteSaturate(
ReadOnlySpan<float> source,
Span<byte> dest)
{
if (Avx2.IsSupported)
{
VerifySpanInput(source, dest, Vector256<byte>.Count);
int n = dest.Length / Vector256<byte>.Count;
ref Vector256<float> sourceBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
ref Vector256<byte> destBase =
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
var scale = Vector256.Create((float)byte.MaxValue);
ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32);
Vector256<int> mask = Unsafe.As<byte, Vector256<int>>(ref maskBase);
for (int i = 0; i < n; i++)
{
ref Vector256<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
Vector256<float> f0 = Avx.Multiply(scale, s);
Vector256<float> f1 = Avx.Multiply(scale, Unsafe.Add(ref s, 1));
Vector256<float> f2 = Avx.Multiply(scale, Unsafe.Add(ref s, 2));
Vector256<float> f3 = Avx.Multiply(scale, Unsafe.Add(ref s, 3));
Vector256<int> w0 = Avx.ConvertToVector256Int32(f0);
Vector256<int> w1 = Avx.ConvertToVector256Int32(f1);
Vector256<int> w2 = Avx.ConvertToVector256Int32(f2);
Vector256<int> w3 = Avx.ConvertToVector256Int32(f3);
Vector256<short> u0 = Avx2.PackSignedSaturate(w0, w1);
Vector256<short> u1 = Avx2.PackSignedSaturate(w2, w3);
Vector256<byte> b = Avx2.PackUnsignedSaturate(u0, u1);
b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte();
Unsafe.Add(ref destBase, i) = b;
}
}
else
{
// Sse
VerifySpanInput(source, dest, Vector128<byte>.Count);
int n = dest.Length / Vector128<byte>.Count;
ref Vector128<float> sourceBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source));
ref Vector128<byte> destBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
var scale = Vector128.Create((float)byte.MaxValue);
for (int i = 0; i < n; i++)
{
ref Vector128<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
Vector128<float> f0 = Sse.Multiply(scale, s);
Vector128<float> f1 = Sse.Multiply(scale, Unsafe.Add(ref s, 1));
Vector128<float> f2 = Sse.Multiply(scale, Unsafe.Add(ref s, 2));
Vector128<float> f3 = Sse.Multiply(scale, Unsafe.Add(ref s, 3));
Vector128<int> w0 = Sse2.ConvertToVector128Int32(f0);
Vector128<int> w1 = Sse2.ConvertToVector128Int32(f1);
Vector128<int> w2 = Sse2.ConvertToVector128Int32(f2);
Vector128<int> w3 = Sse2.ConvertToVector128Int32(f3);
Vector128<short> u0 = Sse2.PackSignedSaturate(w0, w1);
Vector128<short> u1 = Sse2.PackSignedSaturate(w2, w3);
Unsafe.Add(ref destBase, i) = Sse2.PackUnsignedSaturate(u0, u1);
}
}
}
}
}
}
#endif

275
src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

@ -0,0 +1,275 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
internal static partial class SimdUtils
{
/// <summary>
/// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of floats.</param>
/// <param name="dest">The destination span of floats.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4(
ReadOnlySpan<float> source,
Span<float> dest,
byte control)
{
VerifyShuffle4SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle4Reduce(ref source, ref dest, control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
Shuffle4Remainder(source, dest, control);
}
}
/// <summary>
/// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="shuffle">The type of shuffle to perform.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4<TShuffle>(
ReadOnlySpan<byte> source,
Span<byte> dest,
TShuffle shuffle)
where TShuffle : struct, IShuffle4
{
VerifyShuffle4SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
shuffle.RunFallbackShuffle(source, dest);
}
}
/// <summary>
/// Shuffle 8-bit integer triplets within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="shuffle">The type of shuffle to perform.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle3<TShuffle>(
ReadOnlySpan<byte> source,
Span<byte> dest,
TShuffle shuffle)
where TShuffle : struct, IShuffle3
{
VerifyShuffle3SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle3Reduce(ref source, ref dest, shuffle.Control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
shuffle.RunFallbackShuffle(source, dest);
}
}
/// <summary>
/// Pads then shuffles 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="shuffle">The type of shuffle to perform.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Pad3Shuffle4<TShuffle>(
ReadOnlySpan<byte> source,
Span<byte> dest,
TShuffle shuffle)
where TShuffle : struct, IPad3Shuffle4
{
VerifyPad3Shuffle4SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, shuffle.Control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
shuffle.RunFallbackShuffle(source, dest);
}
}
/// <summary>
/// Shuffles then slices 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="shuffle">The type of shuffle to perform.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4Slice3<TShuffle>(
ReadOnlySpan<byte> source,
Span<byte> dest,
TShuffle shuffle)
where TShuffle : struct, IShuffle4Slice3
{
VerifyShuffle4Slice3SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, shuffle.Control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
shuffle.RunFallbackShuffle(source, dest);
}
}
private static void Shuffle4Remainder(
ReadOnlySpan<float> source,
Span<float> dest,
byte control)
{
ref float sBase = ref MemoryMarshal.GetReference(source);
ref float dBase = ref MemoryMarshal.GetReference(dest);
Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0);
for (int i = 0; i < source.Length; i += 4)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i);
}
}
[Conditional("DEBUG")]
private static void VerifyShuffle4SpanInput<T>(ReadOnlySpan<T> source, Span<T> dest)
where T : struct
{
DebugGuard.IsTrue(
source.Length == dest.Length,
nameof(source),
"Input spans must be of same length!");
DebugGuard.IsTrue(
source.Length % 4 == 0,
nameof(source),
"Input spans must be divisable by 4!");
}
[Conditional("DEBUG")]
private static void VerifyShuffle3SpanInput<T>(ReadOnlySpan<T> source, Span<T> dest)
where T : struct
{
DebugGuard.IsTrue(
source.Length == dest.Length,
nameof(source),
"Input spans must be of same length!");
DebugGuard.IsTrue(
source.Length % 3 == 0,
nameof(source),
"Input spans must be divisable by 3!");
}
[Conditional("DEBUG")]
private static void VerifyPad3Shuffle4SpanInput(ReadOnlySpan<byte> source, Span<byte> dest)
{
DebugGuard.IsTrue(
source.Length % 3 == 0,
nameof(source),
"Input span must be divisable by 3!");
DebugGuard.IsTrue(
dest.Length % 4 == 0,
nameof(dest),
"Output span must be divisable by 4!");
DebugGuard.IsTrue(
source.Length == dest.Length * 3 / 4,
nameof(source),
"Input span must be 3/4 the length of the output span!");
}
[Conditional("DEBUG")]
private static void VerifyShuffle4Slice3SpanInput(ReadOnlySpan<byte> source, Span<byte> dest)
{
DebugGuard.IsTrue(
source.Length % 4 == 0,
nameof(source),
"Input span must be divisable by 4!");
DebugGuard.IsTrue(
dest.Length % 3 == 0,
nameof(dest),
"Output span must be divisable by 3!");
DebugGuard.IsTrue(
dest.Length >= source.Length * 3 / 4,
nameof(source),
"Output span must be at least 3/4 the length of the input span!");
}
public static class Shuffle
{
[MethodImpl(InliningOptions.ShortMethod)]
public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0)
=> (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0);
[MethodImpl(InliningOptions.ShortMethod)]
public static void MmShuffleSpan(ref Span<byte> span, byte control)
{
InverseMmShuffle(
control,
out int p3,
out int p2,
out int p1,
out int p0);
ref byte spanBase = ref MemoryMarshal.GetReference(span);
for (int i = 0; i < span.Length; i += 4)
{
Unsafe.Add(ref spanBase, i) = (byte)(p0 + i);
Unsafe.Add(ref spanBase, i + 1) = (byte)(p1 + i);
Unsafe.Add(ref spanBase, i + 2) = (byte)(p2 + i);
Unsafe.Add(ref spanBase, i + 3) = (byte)(p3 + i);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void InverseMmShuffle(
byte control,
out int p3,
out int p2,
out int p1,
out int p0)
{
p3 = control >> 6 & 0x3;
p2 = control >> 4 & 0x3;
p1 = control >> 2 & 0x3;
p0 = control >> 0 & 0x3;
}
}
}
}

26
src/ImageSharp/Common/Helpers/SimdUtils.cs

@ -25,6 +25,25 @@ namespace SixLabors.ImageSharp
public static bool HasVector8 { get; } =
Vector.IsHardwareAccelerated && Vector<float>.Count == 8 && Vector<int>.Count == 8;
/// <summary>
/// Gets a value indicating whether <see cref="Vector{T}"/> code is being JIT-ed to SSE instructions
/// where float and integer registers are of size 128 byte.
/// </summary>
public static bool HasVector4 { get; } =
Vector.IsHardwareAccelerated && Vector<float>.Count == 4;
public static bool HasAvx2
{
get
{
#if SUPPORTS_RUNTIME_INTRINSICS
return Avx2.IsSupported;
#else
return false;
#endif
}
}
/// <summary>
/// Transform all scalars in 'v' in a way that converting them to <see cref="int"/> would have rounding semantics.
/// </summary>
@ -79,8 +98,9 @@ namespace SixLabors.ImageSharp
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
#if SUPPORTS_EXTENDED_INTRINSICS
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.ByteToNormalizedFloatReduce(ref source, ref dest);
#elif SUPPORTS_EXTENDED_INTRINSICS
ExtendedIntrinsics.ByteToNormalizedFloatReduce(ref source, ref dest);
#else
BasicIntrinsics256.ByteToNormalizedFloatReduce(ref source, ref dest);
@ -110,7 +130,7 @@ namespace SixLabors.ImageSharp
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
#if SUPPORTS_RUNTIME_INTRINSICS
Avx2Intrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest);
HwIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest);
#elif SUPPORTS_EXTENDED_INTRINSICS
ExtendedIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest);
#else

79
src/ImageSharp/Common/Helpers/Vector4Utilities.cs

@ -5,6 +5,10 @@ using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp
{
@ -13,6 +17,9 @@ namespace SixLabors.ImageSharp
/// </summary>
internal static class Vector4Utilities
{
private const int BlendAlphaControl = 0b_10_00_10_00;
private const int ShuffleAlphaControl = 0b_11_11_11_11;
/// <summary>
/// Restricts a vector between a minimum and a maximum value.
/// 5x Faster then <see cref="Vector4.Clamp(Vector4, Vector4, Vector4)"/>.
@ -56,13 +63,39 @@ namespace SixLabors.ImageSharp
[MethodImpl(InliningOptions.ShortMethod)]
public static void Premultiply(Span<Vector4> vectors)
{
// TODO: This method can be AVX2 optimized using Vector<float>
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported && vectors.Length >= 2)
{
ref Vector256<float> vectorsBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
for (int i = 0; i < vectors.Length; i++)
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
{
Vector256<float> source = vectorsBase;
Vector256<float> multiply = Avx.Shuffle(source, source, ShuffleAlphaControl);
vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl);
vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
}
if (ImageMaths.Modulo2(vectors.Length) != 0)
{
// Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
Premultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1)));
}
}
else
#endif
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
Premultiply(ref v);
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
for (int i = 0; i < vectors.Length; i++)
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
Premultiply(ref v);
}
}
}
@ -73,13 +106,39 @@ namespace SixLabors.ImageSharp
[MethodImpl(InliningOptions.ShortMethod)]
public static void UnPremultiply(Span<Vector4> vectors)
{
// TODO: This method can be AVX2 optimized using Vector<float>
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported && vectors.Length >= 2)
{
ref Vector256<float> vectorsBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
for (int i = 0; i < vectors.Length; i++)
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
{
Vector256<float> source = vectorsBase;
Vector256<float> multiply = Avx.Shuffle(source, source, ShuffleAlphaControl);
vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl);
vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
}
if (ImageMaths.Modulo2(vectors.Length) != 0)
{
// Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
UnPremultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1)));
}
}
else
#endif
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
UnPremultiply(ref v);
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
for (int i = 0; i < vectors.Length; i++)
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
UnPremultiply(ref v);
}
}
}

20
src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs

@ -6,7 +6,6 @@ using System.Buffers;
using System.IO;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.Common.Helpers;
using SixLabors.ImageSharp.Memory;
@ -171,7 +170,7 @@ namespace SixLabors.ImageSharp.Formats.Bmp
var fileHeader = new BmpFileHeader(
type: BmpConstants.TypeMarkers.Bitmap,
fileSize: BmpFileHeader.Size + infoHeaderSize + infoHeader.ImageSize,
fileSize: BmpFileHeader.Size + infoHeaderSize + colorPaletteSize + infoHeader.ImageSize,
reserved: 0,
offset: BmpFileHeader.Size + infoHeaderSize + colorPaletteSize);
@ -342,20 +341,11 @@ namespace SixLabors.ImageSharp.Formats.Bmp
using IndexedImageFrame<TPixel> quantized = frameQuantizer.BuildPaletteAndQuantizeFrame(image, image.Bounds());
ReadOnlySpan<TPixel> quantizedColors = quantized.Palette.Span;
var color = default(Rgba32);
// TODO: Use bulk conversion here for better perf
int idx = 0;
foreach (TPixel quantizedColor in quantizedColors)
PixelOperations<TPixel>.Instance.ToBgra32(this.configuration, quantizedColors, MemoryMarshal.Cast<byte, Bgra32>(colorPalette));
Span<uint> colorPaletteAsUInt = MemoryMarshal.Cast<byte, uint>(colorPalette);
for (int i = 0; i < colorPaletteAsUInt.Length; i++)
{
quantizedColor.ToRgba32(ref color);
colorPalette[idx] = color.B;
colorPalette[idx + 1] = color.G;
colorPalette[idx + 2] = color.R;
// Padding byte, always 0.
colorPalette[idx + 3] = 0;
idx += 4;
colorPaletteAsUInt[i] = colorPaletteAsUInt[i] & 0x00FFFFFF; // Padding byte, always 0.
}
stream.Write(colorPalette);

86
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs

@ -10,90 +10,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal partial struct Block8x8F
{
/// <summary>
/// Transpose the block into the destination block.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto(ref Block8x8F d)
{
d.V0L.X = V0L.X;
d.V1L.X = V0L.Y;
d.V2L.X = V0L.Z;
d.V3L.X = V0L.W;
d.V4L.X = V0R.X;
d.V5L.X = V0R.Y;
d.V6L.X = V0R.Z;
d.V7L.X = V0R.W;
d.V0L.Y = V1L.X;
d.V1L.Y = V1L.Y;
d.V2L.Y = V1L.Z;
d.V3L.Y = V1L.W;
d.V4L.Y = V1R.X;
d.V5L.Y = V1R.Y;
d.V6L.Y = V1R.Z;
d.V7L.Y = V1R.W;
d.V0L.Z = V2L.X;
d.V1L.Z = V2L.Y;
d.V2L.Z = V2L.Z;
d.V3L.Z = V2L.W;
d.V4L.Z = V2R.X;
d.V5L.Z = V2R.Y;
d.V6L.Z = V2R.Z;
d.V7L.Z = V2R.W;
d.V0L.W = V3L.X;
d.V1L.W = V3L.Y;
d.V2L.W = V3L.Z;
d.V3L.W = V3L.W;
d.V4L.W = V3R.X;
d.V5L.W = V3R.Y;
d.V6L.W = V3R.Z;
d.V7L.W = V3R.W;
d.V0R.X = V4L.X;
d.V1R.X = V4L.Y;
d.V2R.X = V4L.Z;
d.V3R.X = V4L.W;
d.V4R.X = V4R.X;
d.V5R.X = V4R.Y;
d.V6R.X = V4R.Z;
d.V7R.X = V4R.W;
d.V0R.Y = V5L.X;
d.V1R.Y = V5L.Y;
d.V2R.Y = V5L.Z;
d.V3R.Y = V5L.W;
d.V4R.Y = V5R.X;
d.V5R.Y = V5R.Y;
d.V6R.Y = V5R.Z;
d.V7R.Y = V5R.W;
d.V0R.Z = V6L.X;
d.V1R.Z = V6L.Y;
d.V2R.Z = V6L.Z;
d.V3R.Z = V6L.W;
d.V4R.Z = V6R.X;
d.V5R.Z = V6R.Y;
d.V6R.Z = V6R.Z;
d.V7R.Z = V6R.W;
d.V0R.W = V7L.X;
d.V1R.W = V7L.Y;
d.V2R.W = V7L.Z;
d.V3R.W = V7L.W;
d.V4R.W = V7R.X;
d.V5R.W = V7R.Y;
d.V6R.W = V7R.Z;
d.V7R.W = V7R.W;
}
/// <summary>
/// Level shift by +maximum/2, clip to [0, maximum]
/// </summary>
public void NormalizeColorsInplace(float maximum)
public void NormalizeColorsInPlace(float maximum)
{
var CMin4 = new Vector4(0F);
var CMax4 = new Vector4(maximum);
@ -118,10 +38,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
/// <summary>
/// AVX2-only variant for executing <see cref="NormalizeColorsInplace"/> and <see cref="RoundInplace"/> in one step.
/// AVX2-only variant for executing <see cref="NormalizeColorsInPlace"/> and <see cref="RoundInPlace"/> in one step.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public void NormalizeColorsAndRoundInplaceVector8(float maximum)
public void NormalizeColorsAndRoundInPlaceVector8(float maximum)
{
var off = new Vector<float>(MathF.Ceiling(maximum / 2));
var max = new Vector<float>(maximum);

38
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt

@ -23,42 +23,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal partial struct Block8x8F
{
/// <summary>
/// Transpose the block into the destination block.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto(ref Block8x8F d)
{
<#
PushIndent(" ");
for (int i = 0; i < 8; i++)
{
char destCoord = coordz[i % 4];
char destSide = (i / 4) % 2 == 0 ? 'L' : 'R';
for (int j = 0; j < 8; j++)
{
if(i > 0 && j == 0){
WriteLine("");
}
char srcCoord = coordz[j % 4];
char srcSide = (j / 4) % 2 == 0 ? 'L' : 'R';
var expression = $"d.V{j}{destSide}.{destCoord} = V{i}{srcSide}.{srcCoord};\r\n";
Write(expression);
}
}
PopIndent();
#>
}
/// <summary>
/// Level shift by +maximum/2, clip to [0, maximum]
/// </summary>
public void NormalizeColorsInplace(float maximum)
public void NormalizeColorsInPlace(float maximum)
{
var CMin4 = new Vector4(0F);
var CMax4 = new Vector4(maximum);
@ -81,10 +49,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
/// <summary>
/// AVX2-only variant for executing <see cref="NormalizeColorsInplace"/> and <see cref="RoundInplace"/> in one step.
/// AVX2-only variant for executing <see cref="NormalizeColorsInPlace"/> and <see cref="RoundInPlace"/> in one step.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public void NormalizeColorsAndRoundInplaceVector8(float maximum)
public void NormalizeColorsAndRoundInPlaceVector8(float maximum)
{
var off = new Vector<float>(MathF.Ceiling(maximum / 2));
var max = new Vector<float>(maximum);

359
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

@ -6,6 +6,10 @@ using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using System.Text;
// ReSharper disable InconsistentNaming
@ -277,73 +281,156 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// </summary>
/// <param name="value">The value to multiply by.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void MultiplyInplace(float value)
{
this.V0L *= value;
this.V0R *= value;
this.V1L *= value;
this.V1R *= value;
this.V2L *= value;
this.V2R *= value;
this.V3L *= value;
this.V3R *= value;
this.V4L *= value;
this.V4R *= value;
this.V5L *= value;
this.V5R *= value;
this.V6L *= value;
this.V6R *= value;
this.V7L *= value;
this.V7R *= value;
public void MultiplyInPlace(float value)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
var valueVec = Vector256.Create(value);
Unsafe.As<Vector4, Vector256<float>>(ref this.V0L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V0L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V1L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V1L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V2L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V2L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V3L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V3L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V4L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V4L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V5L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V5L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V6L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V6L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V7L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V7L), valueVec);
}
else
#endif
{
var valueVec = new Vector4(value);
this.V0L *= valueVec;
this.V0R *= valueVec;
this.V1L *= valueVec;
this.V1R *= valueVec;
this.V2L *= valueVec;
this.V2R *= valueVec;
this.V3L *= valueVec;
this.V3R *= valueVec;
this.V4L *= valueVec;
this.V4R *= valueVec;
this.V5L *= valueVec;
this.V5R *= valueVec;
this.V6L *= valueVec;
this.V6R *= valueVec;
this.V7L *= valueVec;
this.V7R *= valueVec;
}
}
/// <summary>
/// Multiply all elements of the block by the corresponding elements of 'other'.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public void MultiplyInplace(ref Block8x8F other)
{
this.V0L *= other.V0L;
this.V0R *= other.V0R;
this.V1L *= other.V1L;
this.V1R *= other.V1R;
this.V2L *= other.V2L;
this.V2R *= other.V2R;
this.V3L *= other.V3L;
this.V3R *= other.V3R;
this.V4L *= other.V4L;
this.V4R *= other.V4R;
this.V5L *= other.V5L;
this.V5R *= other.V5R;
this.V6L *= other.V6L;
this.V6R *= other.V6R;
this.V7L *= other.V7L;
this.V7R *= other.V7R;
public unsafe void MultiplyInPlace(ref Block8x8F other)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
Unsafe.As<Vector4, Vector256<float>>(ref this.V0L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V0L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V0L));
Unsafe.As<Vector4, Vector256<float>>(ref this.V1L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V1L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V1L));
Unsafe.As<Vector4, Vector256<float>>(ref this.V2L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V2L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V2L));
Unsafe.As<Vector4, Vector256<float>>(ref this.V3L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V3L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V3L));
Unsafe.As<Vector4, Vector256<float>>(ref this.V4L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V4L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V4L));
Unsafe.As<Vector4, Vector256<float>>(ref this.V5L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V5L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V5L));
Unsafe.As<Vector4, Vector256<float>>(ref this.V6L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V6L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V6L));
Unsafe.As<Vector4, Vector256<float>>(ref this.V7L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V7L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V7L));
}
else
#endif
{
this.V0L *= other.V0L;
this.V0R *= other.V0R;
this.V1L *= other.V1L;
this.V1R *= other.V1R;
this.V2L *= other.V2L;
this.V2R *= other.V2R;
this.V3L *= other.V3L;
this.V3R *= other.V3R;
this.V4L *= other.V4L;
this.V4R *= other.V4R;
this.V5L *= other.V5L;
this.V5R *= other.V5R;
this.V6L *= other.V6L;
this.V6R *= other.V6R;
this.V7L *= other.V7L;
this.V7R *= other.V7R;
}
}
/// <summary>
/// Adds a vector to all elements of the block.
/// </summary>
/// <param name="diff">The added vector</param>
/// <param name="value">The added vector.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void AddToAllInplace(Vector4 diff)
{
this.V0L += diff;
this.V0R += diff;
this.V1L += diff;
this.V1R += diff;
this.V2L += diff;
this.V2R += diff;
this.V3L += diff;
this.V3R += diff;
this.V4L += diff;
this.V4R += diff;
this.V5L += diff;
this.V5R += diff;
this.V6L += diff;
this.V6R += diff;
this.V7L += diff;
this.V7R += diff;
public void AddInPlace(float value)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
var valueVec = Vector256.Create(value);
Unsafe.As<Vector4, Vector256<float>>(ref this.V0L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V0L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V1L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V1L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V2L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V2L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V3L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V3L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V4L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V4L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V5L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V5L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V6L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V6L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V7L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V7L), valueVec);
}
else
#endif
{
var valueVec = new Vector4(value);
this.V0L += valueVec;
this.V0R += valueVec;
this.V1L += valueVec;
this.V1R += valueVec;
this.V2L += valueVec;
this.V2R += valueVec;
this.V3L += valueVec;
this.V3R += valueVec;
this.V4L += valueVec;
this.V4R += valueVec;
this.V5L += valueVec;
this.V5R += valueVec;
this.V6L += valueVec;
this.V6R += valueVec;
this.V7L += valueVec;
this.V7R += valueVec;
}
}
/// <summary>
@ -464,23 +551,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <summary>
/// Level shift by +maximum/2, clip to [0..maximum], and round all the values in the block.
/// </summary>
public void NormalizeColorsAndRoundInplace(float maximum)
public void NormalizeColorsAndRoundInPlace(float maximum)
{
if (SimdUtils.HasVector8)
{
this.NormalizeColorsAndRoundInplaceVector8(maximum);
this.NormalizeColorsAndRoundInPlaceVector8(maximum);
}
else
{
this.NormalizeColorsInplace(maximum);
this.RoundInplace();
this.NormalizeColorsInPlace(maximum);
this.RoundInPlace();
}
}
/// <summary>
/// Rounds all values in the block.
/// </summary>
public void RoundInplace()
public void RoundInPlace()
{
for (int i = 0; i < Size; i++)
{
@ -596,5 +683,157 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
DebugGuard.MustBeLessThan(idx, Size, nameof(idx));
DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx));
}
/// <summary>
/// Transpose the block into the destination block.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto(ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
// https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536
Vector256<float> r0 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V0L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V4L),
1);
Vector256<float> r1 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V1L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V5L),
1);
Vector256<float> r2 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V2L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V6L),
1);
Vector256<float> r3 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V3L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V7L),
1);
Vector256<float> r4 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V4R),
1);
Vector256<float> r5 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V5R),
1);
Vector256<float> r6 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V6R),
1);
Vector256<float> r7 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V7R),
1);
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
Vector256<float> t2 = Avx.UnpackLow(r2, r3);
Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V0L) = Avx.Blend(t0, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V1L) = Avx.Blend(t2, v, 0x33);
Vector256<float> t4 = Avx.UnpackLow(r4, r5);
Vector256<float> t6 = Avx.UnpackLow(r6, r7);
v = Avx.Shuffle(t4, t6, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V4L) = Avx.Blend(t4, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V5L) = Avx.Blend(t6, v, 0x33);
Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
v = Avx.Shuffle(t1, t3, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V2L) = Avx.Blend(t1, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V3L) = Avx.Blend(t3, v, 0x33);
Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
v = Avx.Shuffle(t5, t7, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V6L) = Avx.Blend(t5, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V7L) = Avx.Blend(t7, v, 0x33);
}
else
#endif
{
d.V0L.X = this.V0L.X;
d.V1L.X = this.V0L.Y;
d.V2L.X = this.V0L.Z;
d.V3L.X = this.V0L.W;
d.V4L.X = this.V0R.X;
d.V5L.X = this.V0R.Y;
d.V6L.X = this.V0R.Z;
d.V7L.X = this.V0R.W;
d.V0L.Y = this.V1L.X;
d.V1L.Y = this.V1L.Y;
d.V2L.Y = this.V1L.Z;
d.V3L.Y = this.V1L.W;
d.V4L.Y = this.V1R.X;
d.V5L.Y = this.V1R.Y;
d.V6L.Y = this.V1R.Z;
d.V7L.Y = this.V1R.W;
d.V0L.Z = this.V2L.X;
d.V1L.Z = this.V2L.Y;
d.V2L.Z = this.V2L.Z;
d.V3L.Z = this.V2L.W;
d.V4L.Z = this.V2R.X;
d.V5L.Z = this.V2R.Y;
d.V6L.Z = this.V2R.Z;
d.V7L.Z = this.V2R.W;
d.V0L.W = this.V3L.X;
d.V1L.W = this.V3L.Y;
d.V2L.W = this.V3L.Z;
d.V3L.W = this.V3L.W;
d.V4L.W = this.V3R.X;
d.V5L.W = this.V3R.Y;
d.V6L.W = this.V3R.Z;
d.V7L.W = this.V3R.W;
d.V0R.X = this.V4L.X;
d.V1R.X = this.V4L.Y;
d.V2R.X = this.V4L.Z;
d.V3R.X = this.V4L.W;
d.V4R.X = this.V4R.X;
d.V5R.X = this.V4R.Y;
d.V6R.X = this.V4R.Z;
d.V7R.X = this.V4R.W;
d.V0R.Y = this.V5L.X;
d.V1R.Y = this.V5L.Y;
d.V2R.Y = this.V5L.Z;
d.V3R.Y = this.V5L.W;
d.V4R.Y = this.V5R.X;
d.V5R.Y = this.V5R.Y;
d.V6R.Y = this.V5R.Z;
d.V7R.Y = this.V5R.W;
d.V0R.Z = this.V6L.X;
d.V1R.Z = this.V6L.Y;
d.V2R.Z = this.V6L.Z;
d.V3R.Z = this.V6L.W;
d.V4R.Z = this.V6R.X;
d.V5R.Z = this.V6R.Y;
d.V6R.Z = this.V6R.Z;
d.V7R.Z = this.V6R.W;
d.V0R.W = this.V7L.X;
d.V1R.W = this.V7L.Y;
d.V2R.W = this.V7L.Z;
d.V3R.W = this.V7L.W;
d.V4R.W = this.V7R.X;
d.V5R.W = this.V7R.Y;
d.V6R.W = this.V7R.Z;
d.V7R.W = this.V7R.W;
}
}
}
}

18
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs

@ -0,0 +1,18 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal abstract class Avx2JpegColorConverter : VectorizedJpegColorConverter
{
protected Avx2JpegColorConverter(JpegColorSpace colorSpace, int precision)
: base(colorSpace, precision, 8)
{
}
protected sealed override bool IsAvailable => SimdUtils.HasAvx2;
}
}
}

18
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs

@ -0,0 +1,18 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal abstract class BasicJpegColorConverter : JpegColorConverter
{
protected BasicJpegColorConverter(JpegColorSpace colorSpace, int precision)
: base(colorSpace, precision)
{
}
protected override bool IsAvailable => true;
}
}
}

81
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs

@ -0,0 +1,81 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromCmykAvx2 : Avx2JpegColorConverter
{
public FromCmykAvx2(int precision)
: base(JpegColorSpace.Cmyk, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> cBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> mBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> yBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> kBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / this.MaximumValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector256<float> k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol);
Vector256<float> c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol);
Vector256<float> m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol);
Vector256<float> y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol);
k = Avx.Multiply(k, scale);
c = Avx.Multiply(Avx.Multiply(c, k), scale);
m = Avx.Multiply(Avx.Multiply(m, k), scale);
y = Avx.Multiply(Avx.Multiply(y, k), scale);
Vector256<float> cmLo = Avx.UnpackLow(c, m);
Vector256<float> yoLo = Avx.UnpackLow(y, one);
Vector256<float> cmHi = Avx.UnpackHigh(c, m);
Vector256<float> yoHi = Avx.UnpackHigh(y, one);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00);
Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10);
Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00);
Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10);
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromCmykBasic.ConvertCore(values, result, this.MaximumValue);
}
}
}

16
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmyk.cs → src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs

@ -8,16 +8,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromCmyk : JpegColorConverter
internal sealed class FromCmykBasic : BasicJpegColorConverter
{
public FromCmyk(int precision)
public FromCmykBasic(int precision)
: base(JpegColorSpace.Cmyk, precision)
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
// TODO: We can optimize a lot here with Vector<float> and SRCS.Unsafe()!
ConvertCore(values, result, this.MaximumValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
{
ReadOnlySpan<float> cVals = values.Component0;
ReadOnlySpan<float> mVals = values.Component1;
ReadOnlySpan<float> yVals = values.Component2;
@ -25,7 +29,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
var v = new Vector4(0, 0, 0, 1F);
var maximum = 1 / this.MaximumValue;
var maximum = 1 / maxValue;
var scale = new Vector4(maximum, maximum, maximum, 1F);
for (int i = 0; i < result.Length; i++)
@ -33,7 +37,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
float c = cVals[i];
float m = mVals[i];
float y = yVals[i];
float k = kVals[i] / this.MaximumValue;
float k = kVals[i] / maxValue;
v.X = c * k;
v.Y = m * k;
@ -47,4 +51,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
}
}
}
}
}

71
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs

@ -0,0 +1,71 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromCmykVector8 : Vector8JpegColorConverter
{
public FromCmykVector8(int precision)
: base(JpegColorSpace.Cmyk, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
ref Vector<float> cBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> mBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> yBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector<float> kBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
Vector4Pair cc = default;
Vector4Pair mm = default;
Vector4Pair yy = default;
ref Vector<float> ccRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref cc);
ref Vector<float> mmRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref mm);
ref Vector<float> yyRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref yy);
var scale = new Vector<float>(1 / this.MaximumValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector<float> c = Unsafe.Add(ref cBase, i);
Vector<float> m = Unsafe.Add(ref mBase, i);
Vector<float> y = Unsafe.Add(ref yBase, i);
Vector<float> k = Unsafe.Add(ref kBase, i) * scale;
c = (c * k) * scale;
m = (m * k) * scale;
y = (y * k) * scale;
ccRefAsVector = c;
mmRefAsVector = m;
yyRefAsVector = y;
// Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref cc, ref mm, ref yy);
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromCmykBasic.ConvertCore(values, result, this.MaximumValue);
}
}
}

63
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs

@ -0,0 +1,63 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromGrayscaleAvx2 : Avx2JpegColorConverter
{
public FromGrayscaleAvx2(int precision)
: base(JpegColorSpace.Grayscale, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> gBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / this.MaximumValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector256<float> g = Avx.Multiply(Unsafe.Add(ref gBase, i), scale);
g = Avx2.PermuteVar8x32(g, vcontrol);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000);
Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Shuffle(g, g, 0b01_01_01_01), one, 0b1000_1000);
Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Shuffle(g, g, 0b10_10_10_10), one, 0b1000_1000);
Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Shuffle(g, g, 0b11_11_11_11), one, 0b1000_1000);
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue);
}
}
}

13
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScale.cs → src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs

@ -10,16 +10,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromGrayscale : JpegColorConverter
internal sealed class FromGrayscaleBasic : BasicJpegColorConverter
{
public FromGrayscale(int precision)
public FromGrayscaleBasic(int precision)
: base(JpegColorSpace.Grayscale, precision)
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
var maximum = 1 / this.MaximumValue;
ConvertCore(values, result, this.MaximumValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
{
var maximum = 1 / maxValue;
var scale = new Vector4(maximum, maximum, maximum, 1F);
ref float sBase = ref MemoryMarshal.GetReference(values.Component0);
@ -35,4 +40,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
}
}
}
}
}

72
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs

@ -0,0 +1,72 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromRgbAvx2 : Avx2JpegColorConverter
{
public FromRgbAvx2(int precision)
: base(JpegColorSpace.RGB, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> rBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> gBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> bBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var scale = Vector256.Create(1 / this.MaximumValue);
var one = Vector256.Create(1F);
// Used for packing
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector256<float> r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale);
Vector256<float> g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale);
Vector256<float> b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale);
Vector256<float> rgLo = Avx.UnpackLow(r, g);
Vector256<float> boLo = Avx.UnpackLow(b, one);
Vector256<float> rgHi = Avx.UnpackHigh(r, g);
Vector256<float> boHi = Avx.UnpackHigh(b, one);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00);
Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10);
Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00);
Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10);
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromRgbBasic.ConvertCore(values, result, this.MaximumValue);
}
}
}

14
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgb.cs → src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs

@ -8,23 +8,27 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromRgb : JpegColorConverter
internal sealed class FromRgbBasic : BasicJpegColorConverter
{
public FromRgb(int precision)
public FromRgbBasic(int precision)
: base(JpegColorSpace.RGB, precision)
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
// TODO: We can optimize a lot here with Vector<float> and SRCS.Unsafe()!
ConvertCore(values, result, this.MaximumValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue)
{
ReadOnlySpan<float> rVals = values.Component0;
ReadOnlySpan<float> gVals = values.Component1;
ReadOnlySpan<float> bVals = values.Component2;
var v = new Vector4(0, 0, 0, 1);
var maximum = 1 / this.MaximumValue;
var maximum = 1 / maxValue;
var scale = new Vector4(maximum, maximum, maximum, 1F);
for (int i = 0; i < result.Length; i++)
@ -44,4 +48,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
}
}
}
}
}

67
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs

@ -0,0 +1,67 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromRgbVector8 : Vector8JpegColorConverter
{
public FromRgbVector8(int precision)
: base(JpegColorSpace.RGB, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
ref Vector<float> rBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> gBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> bBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
Vector4Pair rr = default;
Vector4Pair gg = default;
Vector4Pair bb = default;
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
var scale = new Vector<float>(1 / this.MaximumValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
Vector<float> r = Unsafe.Add(ref rBase, i);
Vector<float> g = Unsafe.Add(ref gBase, i);
Vector<float> b = Unsafe.Add(ref bBase, i);
r *= scale;
g *= scale;
b *= scale;
rrRefAsVector = r;
ggRefAsVector = g;
bbRefAsVector = b;
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromRgbBasic.ConvertCore(values, result, this.MaximumValue);
}
}
}

101
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs

@ -0,0 +1,101 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif
// ReSharper disable ImpureMethodCallOnReadonlyValueField
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYCbCrAvx2 : Avx2JpegColorConverter
{
public FromYCbCrAvx2(int precision)
: base(JpegColorSpace.YCbCr, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> yBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> cbBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> crBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var chromaOffset = Vector256.Create(-this.HalfValue);
var scale = Vector256.Create(1 / this.MaximumValue);
var rCrMult = Vector256.Create(1.402F);
var gCbMult = Vector256.Create(-0.344136F);
var gCrMult = Vector256.Create(-0.714136F);
var bCbMult = Vector256.Create(1.772F);
// Used for packing.
var va = Vector256.Create(1F);
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
Vector256<float> y = Unsafe.Add(ref yBase, i);
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset);
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset);
y = Avx2.PermuteVar8x32(y, vcontrol);
cb = Avx2.PermuteVar8x32(cb, vcontrol);
cr = Avx2.PermuteVar8x32(cr, vcontrol);
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult);
Vector256<float> g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult);
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult);
// TODO: We should be saving to RGBA not Vector4
r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale);
g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale);
b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale);
Vector256<float> vte = Avx.UnpackLow(r, b);
Vector256<float> vto = Avx.UnpackLow(g, va);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
vte = Avx.UnpackHigh(r, b);
vto = Avx.UnpackHigh(g, va);
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
}
}
}

4
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs

@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYCbCrBasic : JpegColorConverter
internal sealed class FromYCbCrBasic : BasicJpegColorConverter
{
public FromYCbCrBasic(int precision)
: base(JpegColorSpace.YCbCr, precision)
@ -48,4 +48,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
}
}
}
}
}

53
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs → src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs

@ -5,37 +5,24 @@ using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYCbCrSimd : JpegColorConverter
internal sealed class FromYCbCrVector4 : VectorizedJpegColorConverter
{
public FromYCbCrSimd(int precision)
: base(JpegColorSpace.YCbCr, precision)
public FromYCbCrVector4(int precision)
: base(JpegColorSpace.YCbCr, precision, 8)
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
int remainder = result.Length % 8;
int simdCount = result.Length - remainder;
if (simdCount > 0)
{
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue);
}
protected override bool IsAvailable => SimdUtils.HasVector4;
FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue);
}
/// <summary>
/// SIMD convert using buffers of sizes divisible by 8.
/// </summary>
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue)
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
// TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector<T> is terrible?)
DebugGuard.IsTrue(result.Length % 8 == 0, nameof(result), "result.Length should be divisible by 8!");
ref Vector4Pair yBase =
@ -48,7 +35,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
var chromaOffset = new Vector4(-halfValue);
var chromaOffset = new Vector4(-this.HalfValue);
var maxValue = this.MaximumValue;
// Walking 8 elements at one step:
int n = result.Length / 8;
@ -87,31 +75,18 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
tmp.MultiplyInplace(1.772F);
b.AddInplace(ref tmp);
if (Vector<float>.Count == 4)
{
// TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector<T> is terrible?)
r.RoundAndDownscalePreVector8(maxValue);
g.RoundAndDownscalePreVector8(maxValue);
b.RoundAndDownscalePreVector8(maxValue);
}
else if (SimdUtils.HasVector8)
{
r.RoundAndDownscaleVector8(maxValue);
g.RoundAndDownscaleVector8(maxValue);
b.RoundAndDownscaleVector8(maxValue);
}
else
{
// TODO: Run fallback scalar code here
// However, no issues expected before someone implements this: https://github.com/dotnet/coreclr/issues/12007
JpegThrowHelper.ThrowNotImplementedException("Your CPU architecture is too modern!");
}
r.RoundAndDownscalePreVector8(maxValue);
g.RoundAndDownscalePreVector8(maxValue);
b.RoundAndDownscalePreVector8(maxValue);
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref r, ref g, ref b);
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
}
}
}

41
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs → src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs

@ -1,11 +1,10 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Tuples;
// ReSharper disable ImpureMethodCallOnReadonlyValueField
@ -13,40 +12,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYCbCrSimdVector8 : JpegColorConverter
internal sealed class FromYCbCrVector8 : Vector8JpegColorConverter
{
public FromYCbCrSimdVector8(int precision)
public FromYCbCrVector8(int precision)
: base(JpegColorSpace.YCbCr, precision)
{
}
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8;
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
int remainder = result.Length % 8;
int simdCount = result.Length - remainder;
if (simdCount > 0)
{
ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue);
}
FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue);
}
/// <summary>
/// SIMD convert using buffers of sizes divisible by 8.
/// </summary>
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue)
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
// This implementation is actually AVX specific.
// An AVX register is capable of storing 8 float-s.
if (!IsAvailable)
{
throw new InvalidOperationException(
"JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!");
}
ref Vector<float> yBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> cbBase =
@ -57,7 +31,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
var chromaOffset = new Vector<float>(-halfValue);
var chromaOffset = new Vector<float>(-this.HalfValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
@ -70,7 +44,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
var scale = new Vector<float>(1 / maxValue);
var scale = new Vector<float>(1 / this.MaximumValue);
for (int i = 0; i < n; i++)
{
@ -105,6 +79,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
destination.Pack(ref rr, ref gg, ref bb);
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
}
}
}

110
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs

@ -0,0 +1,110 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static SixLabors.ImageSharp.SimdUtils;
#endif
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYccKAvx2 : Avx2JpegColorConverter
{
public FromYccKAvx2(int precision)
: base(JpegColorSpace.Ycck, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<float> yBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector256<float> cbBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector256<float> crBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector256<float> kBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector256<float> resultBase =
ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(result));
// Used for the color conversion
var chromaOffset = Vector256.Create(-this.HalfValue);
var scale = Vector256.Create(1 / this.MaximumValue);
var max = Vector256.Create(this.MaximumValue);
var rCrMult = Vector256.Create(1.402F);
var gCbMult = Vector256.Create(-0.344136F);
var gCrMult = Vector256.Create(-0.714136F);
var bCbMult = Vector256.Create(1.772F);
// Used for packing.
var va = Vector256.Create(1F);
ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32);
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
// Walking 8 elements at one step:
int n = result.Length / 8;
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
// k = kVals[i] / 256F;
Vector256<float> y = Unsafe.Add(ref yBase, i);
Vector256<float> cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset);
Vector256<float> cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset);
Vector256<float> k = Avx.Divide(Unsafe.Add(ref kBase, i), max);
y = Avx2.PermuteVar8x32(y, vcontrol);
cb = Avx2.PermuteVar8x32(cb, vcontrol);
cr = Avx2.PermuteVar8x32(cr, vcontrol);
k = Avx2.PermuteVar8x32(k, vcontrol);
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector256<float> r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult);
Vector256<float> g =
HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult);
Vector256<float> b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult);
r = Avx.Subtract(max, Avx.RoundToNearestInteger(r));
g = Avx.Subtract(max, Avx.RoundToNearestInteger(g));
b = Avx.Subtract(max, Avx.RoundToNearestInteger(b));
r = Avx.Multiply(Avx.Multiply(r, k), scale);
g = Avx.Multiply(Avx.Multiply(g, k), scale);
b = Avx.Multiply(Avx.Multiply(b, k), scale);
Vector256<float> vte = Avx.UnpackLow(r, b);
Vector256<float> vto = Avx.UnpackLow(g, va);
ref Vector256<float> destination = ref Unsafe.Add(ref resultBase, i * 4);
destination = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto);
vte = Avx.UnpackHigh(r, b);
vto = Avx.UnpackHigh(g, va);
Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto);
Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto);
}
#endif
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
}
}
}

25
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccK.cs → src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs

@ -8,14 +8,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYccK : JpegColorConverter
internal sealed class FromYccKBasic : BasicJpegColorConverter
{
public FromYccK(int precision)
public FromYccKBasic(int precision)
: base(JpegColorSpace.Ycck, precision)
{
}
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
ConvertCore(values, result, this.MaximumValue, this.HalfValue);
}
internal static void ConvertCore(in ComponentValues values, Span<Vector4> result, float maxValue, float halfValue)
{
// TODO: We can optimize a lot here with Vector<float> and SRCS.Unsafe()!
ReadOnlySpan<float> yVals = values.Component0;
@ -25,19 +30,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
var v = new Vector4(0, 0, 0, 1F);
var maximum = 1 / this.MaximumValue;
var maximum = 1 / maxValue;
var scale = new Vector4(maximum, maximum, maximum, 1F);
for (int i = 0; i < result.Length; i++)
{
float y = yVals[i];
float cb = cbVals[i] - this.HalfValue;
float cr = crVals[i] - this.HalfValue;
float k = kVals[i] / this.MaximumValue;
float cb = cbVals[i] - halfValue;
float cr = crVals[i] - halfValue;
float k = kVals[i] / maxValue;
v.X = (this.MaximumValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k;
v.Y = (this.MaximumValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k;
v.Z = (this.MaximumValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k;
v.X = (maxValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k;
v.Y = (maxValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k;
v.Z = (maxValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k;
v.W = 1F;
v *= scale;
@ -47,4 +52,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
}
}
}
}
}

91
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs

@ -0,0 +1,91 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal sealed class FromYccKVector8 : Vector8JpegColorConverter
{
public FromYccKVector8(int precision)
: base(JpegColorSpace.Ycck, precision)
{
}
protected override void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result)
{
ref Vector<float> yBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component0));
ref Vector<float> cbBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component1));
ref Vector<float> crBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component2));
ref Vector<float> kBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(values.Component3));
ref Vector4Octet resultBase =
ref Unsafe.As<Vector4, Vector4Octet>(ref MemoryMarshal.GetReference(result));
var chromaOffset = new Vector<float>(-this.HalfValue);
// Walking 8 elements at one step:
int n = result.Length / 8;
Vector4Pair rr = default;
Vector4Pair gg = default;
Vector4Pair bb = default;
ref Vector<float> rrRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref rr);
ref Vector<float> ggRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref gg);
ref Vector<float> bbRefAsVector = ref Unsafe.As<Vector4Pair, Vector<float>>(ref bb);
var scale = new Vector<float>(1 / this.MaximumValue);
var max = new Vector<float>(this.MaximumValue);
for (int i = 0; i < n; i++)
{
// y = yVals[i];
// cb = cbVals[i] - 128F;
// cr = crVals[i] - 128F;
// k = kVals[i] / 256F;
Vector<float> y = Unsafe.Add(ref yBase, i);
Vector<float> cb = Unsafe.Add(ref cbBase, i) + chromaOffset;
Vector<float> cr = Unsafe.Add(ref crBase, i) + chromaOffset;
Vector<float> k = Unsafe.Add(ref kBase, i) / max;
// r = y + (1.402F * cr);
// g = y - (0.344136F * cb) - (0.714136F * cr);
// b = y + (1.772F * cb);
// Adding & multiplying 8 elements at one time:
Vector<float> r = y + (cr * new Vector<float>(1.402F));
Vector<float> g = y - (cb * new Vector<float>(0.344136F)) - (cr * new Vector<float>(0.714136F));
Vector<float> b = y + (cb * new Vector<float>(1.772F));
r = (max - r.FastRound()) * k;
g = (max - g.FastRound()) * k;
b = (max - b.FastRound()) * k;
r *= scale;
g *= scale;
b *= scale;
rrRefAsVector = r;
ggRefAsVector = g;
bbRefAsVector = b;
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Pack(ref rr, ref gg, ref bb);
}
}
protected override void ConvertCore(in ComponentValues values, Span<Vector4> result) =>
FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue);
}
}
}

18
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs

@ -0,0 +1,18 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal abstract class Vector8JpegColorConverter : VectorizedJpegColorConverter
{
protected Vector8JpegColorConverter(JpegColorSpace colorSpace, int precision)
: base(colorSpace, precision, 8)
{
}
protected sealed override bool IsAvailable => SimdUtils.HasVector8;
}
}
}

46
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs

@ -0,0 +1,46 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
internal abstract partial class JpegColorConverter
{
internal abstract class VectorizedJpegColorConverter : JpegColorConverter
{
private readonly int vectorSize;
protected VectorizedJpegColorConverter(JpegColorSpace colorSpace, int precision, int vectorSize)
: base(colorSpace, precision)
{
this.vectorSize = vectorSize;
}
public sealed override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{
int remainder = result.Length % this.vectorSize;
int simdCount = result.Length - remainder;
if (simdCount > 0)
{
// This implementation is actually AVX specific.
// An AVX register is capable of storing 8 float-s.
if (!this.IsAvailable)
{
throw new InvalidOperationException(
"This converter can be used only on architecture having 256 byte floating point SIMD registers!");
}
this.ConvertCoreVectorized(values.Slice(0, simdCount), result.Slice(0, simdCount));
}
this.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder));
}
protected abstract void ConvertCoreVectorized(in ComponentValues values, Span<Vector4> result);
protected abstract void ConvertCore(in ComponentValues values, Span<Vector4> result);
}
}
}

161
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs

@ -3,8 +3,8 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Numerics;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.Tuples;
@ -18,22 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
/// <summary>
/// The available converters
/// </summary>
private static readonly JpegColorConverter[] Converters =
{
// 8-bit converters
GetYCbCrConverter(8),
new FromYccK(8),
new FromCmyk(8),
new FromGrayscale(8),
new FromRgb(8),
// 12-bit converters
GetYCbCrConverter(12),
new FromYccK(12),
new FromCmyk(12),
new FromGrayscale(12),
new FromRgb(12),
};
private static readonly JpegColorConverter[] Converters = CreateConverters();
/// <summary>
/// Initializes a new instance of the <see cref="JpegColorConverter"/> class.
@ -46,6 +31,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
this.HalfValue = MathF.Ceiling(this.MaximumValue / 2);
}
/// <summary>
/// Gets a value indicating whether this <see cref="JpegColorConverter"/> is available
/// on the current runtime and CPU architecture.
/// </summary>
protected abstract bool IsAvailable { get; }
/// <summary>
/// Gets the <see cref="JpegColorSpace"/> of this converter.
/// </summary>
@ -71,8 +62,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
/// </summary>
public static JpegColorConverter GetConverter(JpegColorSpace colorSpace, int precision)
{
JpegColorConverter converter = Array.Find(Converters, c => c.ColorSpace == colorSpace
&& c.Precision == precision);
JpegColorConverter converter = Array.Find(
Converters,
c => c.ColorSpace == colorSpace
&& c.Precision == precision);
if (converter is null)
{
@ -90,10 +83,88 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
public abstract void ConvertToRgba(in ComponentValues values, Span<Vector4> result);
/// <summary>
/// Returns the <see cref="JpegColorConverter"/> for the YCbCr colorspace that matches the current CPU architecture.
/// Returns the <see cref="JpegColorConverter"/>s for all supported colorspaces and precisions.
/// </summary>
private static JpegColorConverter[] CreateConverters()
{
var converters = new List<JpegColorConverter>();
// 8-bit converters
converters.AddRange(GetYCbCrConverters(8));
converters.AddRange(GetYccKConverters(8));
converters.AddRange(GetCmykConverters(8));
converters.AddRange(GetGrayScaleConverters(8));
converters.AddRange(GetRgbConverters(8));
// 12-bit converters
converters.AddRange(GetYCbCrConverters(12));
converters.AddRange(GetYccKConverters(12));
converters.AddRange(GetCmykConverters(12));
converters.AddRange(GetGrayScaleConverters(12));
converters.AddRange(GetRgbConverters(12));
return converters.Where(x => x.IsAvailable).ToArray();
}
/// <summary>
/// Returns the <see cref="JpegColorConverter"/>s for the YCbCr colorspace.
/// </summary>
private static IEnumerable<JpegColorConverter> GetYCbCrConverters(int precision)
{
#if SUPPORTS_RUNTIME_INTRINSICS
yield return new FromYCbCrAvx2(precision);
#endif
yield return new FromYCbCrVector8(precision);
yield return new FromYCbCrVector4(precision);
yield return new FromYCbCrBasic(precision);
}
/// <summary>
/// Returns the <see cref="JpegColorConverter"/>s for the YccK colorspace.
/// </summary>
private static IEnumerable<JpegColorConverter> GetYccKConverters(int precision)
{
#if SUPPORTS_RUNTIME_INTRINSICS
yield return new FromYccKAvx2(precision);
#endif
yield return new FromYccKVector8(precision);
yield return new FromYccKBasic(precision);
}
/// <summary>
/// Returns the <see cref="JpegColorConverter"/>s for the CMYK colorspace.
/// </summary>
private static IEnumerable<JpegColorConverter> GetCmykConverters(int precision)
{
#if SUPPORTS_RUNTIME_INTRINSICS
yield return new FromCmykAvx2(precision);
#endif
yield return new FromCmykVector8(precision);
yield return new FromCmykBasic(precision);
}
/// <summary>
/// Returns the <see cref="JpegColorConverter"/>s for the gray scale colorspace.
/// </summary>
private static IEnumerable<JpegColorConverter> GetGrayScaleConverters(int precision)
{
#if SUPPORTS_RUNTIME_INTRINSICS
yield return new FromGrayscaleAvx2(precision);
#endif
yield return new FromGrayscaleBasic(precision);
}
/// <summary>
/// Returns the <see cref="JpegColorConverter"/>s for the RGB colorspace.
/// </summary>
private static JpegColorConverter GetYCbCrConverter(int precision) =>
FromYCbCrSimdVector8.IsAvailable ? (JpegColorConverter)new FromYCbCrSimdVector8(precision) : new FromYCbCrSimd(precision);
private static IEnumerable<JpegColorConverter> GetRgbConverters(int precision)
{
#if SUPPORTS_RUNTIME_INTRINSICS
yield return new FromRgbAvx2(precision);
#endif
yield return new FromRgbVector8(precision);
yield return new FromRgbBasic(precision);
}
/// <summary>
/// A stack-only struct to reference the input buffers using <see cref="ReadOnlySpan{T}"/>-s.
@ -230,6 +301,52 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
this.V7.Z = b.B.W;
this.V7.W = 1f;
}
/// <summary>
/// Pack (g0,g1...g7) vector values as (g0,g0,g0,1), (g1,g1,g1,1) ...
/// </summary>
public void Pack(ref Vector4Pair g)
{
this.V0.X = g.A.X;
this.V0.Y = g.A.X;
this.V0.Z = g.A.X;
this.V0.W = 1f;
this.V1.X = g.A.Y;
this.V1.Y = g.A.Y;
this.V1.Z = g.A.Y;
this.V1.W = 1f;
this.V2.X = g.A.Z;
this.V2.Y = g.A.Z;
this.V2.Z = g.A.Z;
this.V2.W = 1f;
this.V3.X = g.A.W;
this.V3.Y = g.A.W;
this.V3.Z = g.A.W;
this.V3.W = 1f;
this.V4.X = g.B.X;
this.V4.Y = g.B.X;
this.V4.Z = g.B.X;
this.V4.W = 1f;
this.V5.X = g.B.Y;
this.V5.Y = g.B.Y;
this.V5.Z = g.B.Y;
this.V5.W = 1f;
this.V6.X = g.B.Z;
this.V6.Y = g.B.Z;
this.V6.Z = g.B.Z;
this.V6.W = 1f;
this.V7.X = g.B.W;
this.V7.Y = g.B.W;
this.V7.Z = g.B.W;
this.V7.W = 1f;
}
}
}
}

4
src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs

@ -81,14 +81,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
b.LoadFrom(ref sourceBlock);
// Dequantize:
b.MultiplyInplace(ref this.DequantiazationTable);
b.MultiplyInPlace(ref this.DequantiazationTable);
FastFloatingPointDCT.TransformIDCT(ref b, ref this.WorkspaceBlock1, ref this.WorkspaceBlock2);
// To conform better to libjpeg we actually NEED TO loose precision here.
// This is because they store blocks as Int16 between all the operations.
// To be "more accurate", we need to emulate this by rounding!
this.WorkspaceBlock1.NormalizeColorsAndRoundInplace(maximumValue);
this.WorkspaceBlock1.NormalizeColorsAndRoundInPlace(maximumValue);
this.WorkspaceBlock1.ScaledCopyTo(
ref destAreaOrigin,

12
src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs

@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Numerics;
@ -50,8 +50,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <param name="temp">Temporary block provided by the caller</param>
public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp)
{
// TODO: Transpose is a bottleneck now. We need full AVX support to optimize it:
// https://github.com/dotnet/corefx/issues/22940
src.TransposeInto(ref temp);
IDCT8x4_LeftPart(ref temp, ref dest);
@ -63,7 +61,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
IDCT8x4_RightPart(ref temp, ref dest);
// TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing?
dest.MultiplyInplace(C_0_125);
dest.MultiplyInPlace(C_0_125);
}
/// <summary>
@ -326,7 +324,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
src.TransposeInto(ref temp);
if (offsetSourceByNeg128)
{
temp.AddToAllInplace(new Vector4(-128));
temp.AddInPlace(-128F);
}
FDCT8x4_LeftPart(ref temp, ref dest);
@ -337,7 +335,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
FDCT8x4_LeftPart(ref temp, ref dest);
FDCT8x4_RightPart(ref temp, ref dest);
dest.MultiplyInplace(C_0_125);
dest.MultiplyInPlace(C_0_125);
}
}
}
}

22
src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs

@ -35,6 +35,15 @@ namespace SixLabors.ImageSharp.Formats.Png
options.ColorType ??= pngMetadata.ColorType ?? SuggestColorType<TPixel>();
options.BitDepth ??= pngMetadata.BitDepth ?? SuggestBitDepth<TPixel>();
// Ensure bit depth and color type are a supported combination.
// Bit8 is the only bit depth supported by all color types.
byte bits = (byte)options.BitDepth;
byte[] validBitDepths = PngConstants.ColorTypes[options.ColorType.Value];
if (Array.IndexOf(validBitDepths, bits) == -1)
{
options.BitDepth = PngBitDepth.Bit8;
}
options.InterlaceMethod ??= pngMetadata.InterlaceMethod;
use16Bit = options.BitDepth == PngBitDepth.Bit16;
@ -44,12 +53,6 @@ namespace SixLabors.ImageSharp.Formats.Png
{
options.ChunkFilter = PngChunkFilter.ExcludeAll;
}
// Ensure we are not allowing impossible combinations.
if (!PngConstants.ColorTypes.ContainsKey(options.ColorType.Value))
{
throw new NotSupportedException("Color type is not supported or not valid.");
}
}
/// <summary>
@ -68,15 +71,10 @@ namespace SixLabors.ImageSharp.Formats.Png
return null;
}
byte bits = (byte)options.BitDepth;
if (Array.IndexOf(PngConstants.ColorTypes[options.ColorType.Value], bits) == -1)
{
throw new NotSupportedException("Bit depth is not supported or not valid.");
}
// Use the metadata to determine what quantization depth to use if no quantizer has been set.
if (options.Quantizer is null)
{
byte bits = (byte)options.BitDepth;
var maxColors = ImageMaths.GetColorCountForBitDepth(bits);
options.Quantizer = new WuQuantizer(new QuantizerOptions { MaxColors = maxColors });
}

10
src/ImageSharp/Image{TPixel}.cs

@ -201,14 +201,14 @@ namespace SixLabors.ImageSharp
public bool TryGetSinglePixelSpan(out Span<TPixel> span)
{
IMemoryGroup<TPixel> mg = this.GetPixelMemoryGroup();
if (mg.Count > 1)
if (mg.Count == 1)
{
span = default;
return false;
span = mg[0].Span;
return true;
}
span = mg.Single().Span;
return true;
span = default;
return false;
}
/// <summary>

134
src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs

@ -53,84 +53,112 @@ namespace SixLabors.ImageSharp.PixelFormats
Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale));
}
/// <inheritdoc />
public override void ToRgba32(Configuration configuration, ReadOnlySpan<Argb32> sourcePixels, Span<Rgba32> destinationPixels)
public override void ToRgba32(
Configuration configuration,
ReadOnlySpan<Argb32> sourcePixels,
Span<Rgba32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref uint sourceRef = ref Unsafe.As<Argb32,uint>(ref MemoryMarshal.GetReference(sourcePixels));
ref uint destRef = ref Unsafe.As<Rgba32, uint>(ref MemoryMarshal.GetReference(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
uint sp = Unsafe.Add(ref sourceRef, i);
Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToRgba32(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Argb32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(destinationPixels);
PixelConverter.FromArgb32.ToRgba32(source, dest);
}
/// <inheritdoc />
public override void FromRgba32(Configuration configuration, ReadOnlySpan<Rgba32> sourcePixels, Span<Argb32> destinationPixels)
public override void FromRgba32(
Configuration configuration,
ReadOnlySpan<Rgba32> sourcePixels,
Span<Argb32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref uint sourceRef = ref Unsafe.As<Rgba32,uint>(ref MemoryMarshal.GetReference(sourcePixels));
ref uint destRef = ref Unsafe.As<Argb32, uint>(ref MemoryMarshal.GetReference(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
uint sp = Unsafe.Add(ref sourceRef, i);
Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToArgb32(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgba32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Argb32, byte>(destinationPixels);
PixelConverter.FromRgba32.ToArgb32(source, dest);
}
/// <inheritdoc />
public override void ToBgra32(Configuration configuration, ReadOnlySpan<Argb32> sourcePixels, Span<Bgra32> destinationPixels)
public override void ToBgra32(
Configuration configuration,
ReadOnlySpan<Argb32> sourcePixels,
Span<Bgra32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref uint sourceRef = ref Unsafe.As<Argb32,uint>(ref MemoryMarshal.GetReference(sourcePixels));
ref uint destRef = ref Unsafe.As<Bgra32, uint>(ref MemoryMarshal.GetReference(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
uint sp = Unsafe.Add(ref sourceRef, i);
Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToBgra32(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Argb32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgra32, byte>(destinationPixels);
PixelConverter.FromArgb32.ToBgra32(source, dest);
}
/// <inheritdoc />
public override void FromBgra32(Configuration configuration, ReadOnlySpan<Bgra32> sourcePixels, Span<Argb32> destinationPixels)
public override void FromBgra32(
Configuration configuration,
ReadOnlySpan<Bgra32> sourcePixels,
Span<Argb32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref uint sourceRef = ref Unsafe.As<Bgra32,uint>(ref MemoryMarshal.GetReference(sourcePixels));
ref uint destRef = ref Unsafe.As<Argb32, uint>(ref MemoryMarshal.GetReference(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgra32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Argb32, byte>(destinationPixels);
PixelConverter.FromBgra32.ToArgb32(source, dest);
}
/// <inheritdoc />
public override void ToRgb24(
Configuration configuration,
ReadOnlySpan<Argb32> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
uint sp = Unsafe.Add(ref sourceRef, i);
Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToArgb32(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Argb32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromArgb32.ToRgb24(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(Configuration configuration, ReadOnlySpan<Argb32> sourcePixels, Span<Bgr24> destinationPixels)
public override void FromRgb24(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Argb32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Argb32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Argb32, byte>(destinationPixels);
PixelConverter.FromRgb24.ToArgb32(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(
Configuration configuration,
ReadOnlySpan<Argb32> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Argb32 sp = ref Unsafe.Add(ref sourceRef, i);
ref Bgr24 dp = ref Unsafe.Add(ref destRef, i);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Argb32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromArgb32.ToBgr24(source, dest);
}
dp.FromArgb32(sp);
}
/// <inheritdoc />
public override void FromBgr24(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Argb32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Argb32, byte>(destinationPixels);
PixelConverter.FromBgr24.ToArgb32(source, dest);
}
/// <inheritdoc />
@ -205,24 +233,6 @@ namespace SixLabors.ImageSharp.PixelFormats
}
}
/// <inheritdoc />
public override void ToRgb24(Configuration configuration, ReadOnlySpan<Argb32> sourcePixels, Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Argb32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Argb32 sp = ref Unsafe.Add(ref sourceRef, i);
ref Rgb24 dp = ref Unsafe.Add(ref destRef, i);
dp.FromArgb32(sp);
}
}
/// <inheritdoc />
public override void ToRgb48(Configuration configuration, ReadOnlySpan<Argb32> sourcePixels, Span<Rgb48> destinationPixels)
{

140
src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs

@ -52,146 +52,182 @@ namespace SixLabors.ImageSharp.PixelFormats
{
Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale | PixelConversionModifiers.Premultiply));
}
/// <inheritdoc />
public override void ToArgb32(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<Argb32> destinationPixels)
public override void ToRgba32(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Rgba32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Argb32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(destinationPixels);
PixelConverter.FromBgr24.ToRgba32(source, dest);
}
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Argb32 dp = ref Unsafe.Add(ref destRef, i);
/// <inheritdoc />
public override void FromRgba32(
Configuration configuration,
ReadOnlySpan<Rgba32> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
dp.FromBgr24(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgba32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromRgba32.ToBgr24(source, dest);
}
/// <inheritdoc />
public override void ToBgra32(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<Bgra32> destinationPixels)
public override void ToArgb32(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Argb32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Bgra32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Argb32, byte>(destinationPixels);
PixelConverter.FromBgr24.ToArgb32(source, dest);
}
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Bgra32 dp = ref Unsafe.Add(ref destRef, i);
/// <inheritdoc />
public override void FromArgb32(
Configuration configuration,
ReadOnlySpan<Argb32> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
dp.FromBgr24(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Argb32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromArgb32.ToBgr24(source, dest);
}
/// <inheritdoc />
public override void ToL8(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<L8> destinationPixels)
public override void ToBgra32(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Bgra32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref L8 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgra32, byte>(destinationPixels);
PixelConverter.FromBgr24.ToBgra32(source, dest);
}
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref L8 dp = ref Unsafe.Add(ref destRef, i);
/// <inheritdoc />
public override void FromBgra32(
Configuration configuration,
ReadOnlySpan<Bgra32> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
dp.FromBgr24(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgra32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromBgra32.ToBgr24(source, dest);
}
/// <inheritdoc />
public override void ToL16(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<L16> destinationPixels)
public override void ToRgb24(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref L16 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromBgr24.ToRgb24(source, dest);
}
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref L16 dp = ref Unsafe.Add(ref destRef, i);
/// <inheritdoc />
public override void FromRgb24(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
dp.FromBgr24(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromRgb24.ToBgr24(source, dest);
}
/// <inheritdoc />
public override void ToLa16(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<La16> destinationPixels)
public override void ToL8(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<L8> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref La16 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ref L8 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref La16 dp = ref Unsafe.Add(ref destRef, i);
ref L8 dp = ref Unsafe.Add(ref destRef, i);
dp.FromBgr24(sp);
}
}
/// <inheritdoc />
public override void ToLa32(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<La32> destinationPixels)
public override void ToL16(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<L16> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref La32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ref L16 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref La32 dp = ref Unsafe.Add(ref destRef, i);
ref L16 dp = ref Unsafe.Add(ref destRef, i);
dp.FromBgr24(sp);
}
}
/// <inheritdoc />
public override void ToRgb24(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<Rgb24> destinationPixels)
public override void ToLa16(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<La16> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ref La16 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Rgb24 dp = ref Unsafe.Add(ref destRef, i);
ref La16 dp = ref Unsafe.Add(ref destRef, i);
dp.FromBgr24(sp);
}
}
/// <inheritdoc />
public override void ToRgba32(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<Rgba32> destinationPixels)
public override void ToLa32(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<La32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Rgba32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ref La32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Rgba32 dp = ref Unsafe.Add(ref destRef, i);
ref La32 dp = ref Unsafe.Add(ref destRef, i);
dp.FromBgr24(sp);
}

134
src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs

@ -53,84 +53,112 @@ namespace SixLabors.ImageSharp.PixelFormats
Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale));
}
/// <inheritdoc />
public override void ToRgba32(Configuration configuration, ReadOnlySpan<Bgra32> sourcePixels, Span<Rgba32> destinationPixels)
public override void ToRgba32(
Configuration configuration,
ReadOnlySpan<Bgra32> sourcePixels,
Span<Rgba32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref uint sourceRef = ref Unsafe.As<Bgra32,uint>(ref MemoryMarshal.GetReference(sourcePixels));
ref uint destRef = ref Unsafe.As<Rgba32, uint>(ref MemoryMarshal.GetReference(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
uint sp = Unsafe.Add(ref sourceRef, i);
Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToRgba32(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgra32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(destinationPixels);
PixelConverter.FromBgra32.ToRgba32(source, dest);
}
/// <inheritdoc />
public override void FromRgba32(Configuration configuration, ReadOnlySpan<Rgba32> sourcePixels, Span<Bgra32> destinationPixels)
public override void FromRgba32(
Configuration configuration,
ReadOnlySpan<Rgba32> sourcePixels,
Span<Bgra32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref uint sourceRef = ref Unsafe.As<Rgba32,uint>(ref MemoryMarshal.GetReference(sourcePixels));
ref uint destRef = ref Unsafe.As<Bgra32, uint>(ref MemoryMarshal.GetReference(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
uint sp = Unsafe.Add(ref sourceRef, i);
Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToBgra32(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgba32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgra32, byte>(destinationPixels);
PixelConverter.FromRgba32.ToBgra32(source, dest);
}
/// <inheritdoc />
public override void ToArgb32(Configuration configuration, ReadOnlySpan<Bgra32> sourcePixels, Span<Argb32> destinationPixels)
public override void ToArgb32(
Configuration configuration,
ReadOnlySpan<Bgra32> sourcePixels,
Span<Argb32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref uint sourceRef = ref Unsafe.As<Bgra32,uint>(ref MemoryMarshal.GetReference(sourcePixels));
ref uint destRef = ref Unsafe.As<Argb32, uint>(ref MemoryMarshal.GetReference(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
uint sp = Unsafe.Add(ref sourceRef, i);
Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToArgb32(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgra32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Argb32, byte>(destinationPixels);
PixelConverter.FromBgra32.ToArgb32(source, dest);
}
/// <inheritdoc />
public override void FromArgb32(Configuration configuration, ReadOnlySpan<Argb32> sourcePixels, Span<Bgra32> destinationPixels)
public override void FromArgb32(
Configuration configuration,
ReadOnlySpan<Argb32> sourcePixels,
Span<Bgra32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref uint sourceRef = ref Unsafe.As<Argb32,uint>(ref MemoryMarshal.GetReference(sourcePixels));
ref uint destRef = ref Unsafe.As<Bgra32, uint>(ref MemoryMarshal.GetReference(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Argb32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgra32, byte>(destinationPixels);
PixelConverter.FromArgb32.ToBgra32(source, dest);
}
/// <inheritdoc />
public override void ToRgb24(
Configuration configuration,
ReadOnlySpan<Bgra32> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
uint sp = Unsafe.Add(ref sourceRef, i);
Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToBgra32(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgra32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromBgra32.ToRgb24(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(Configuration configuration, ReadOnlySpan<Bgra32> sourcePixels, Span<Bgr24> destinationPixels)
public override void FromRgb24(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Bgra32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgra32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgra32, byte>(destinationPixels);
PixelConverter.FromRgb24.ToBgra32(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(
Configuration configuration,
ReadOnlySpan<Bgra32> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgra32 sp = ref Unsafe.Add(ref sourceRef, i);
ref Bgr24 dp = ref Unsafe.Add(ref destRef, i);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgra32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromBgra32.ToBgr24(source, dest);
}
dp.FromBgra32(sp);
}
/// <inheritdoc />
public override void FromBgr24(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Bgra32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgra32, byte>(destinationPixels);
PixelConverter.FromBgr24.ToBgra32(source, dest);
}
/// <inheritdoc />
@ -205,24 +233,6 @@ namespace SixLabors.ImageSharp.PixelFormats
}
}
/// <inheritdoc />
public override void ToRgb24(Configuration configuration, ReadOnlySpan<Bgra32> sourcePixels, Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgra32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgra32 sp = ref Unsafe.Add(ref sourceRef, i);
ref Rgb24 dp = ref Unsafe.Add(ref destRef, i);
dp.FromBgra32(sp);
}
}
/// <inheritdoc />
public override void ToRgb48(Configuration configuration, ReadOnlySpan<Bgra32> sourcePixels, Span<Rgb48> destinationPixels)
{

129
src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs

@ -52,59 +52,114 @@ namespace SixLabors.ImageSharp.PixelFormats
{
Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale | PixelConversionModifiers.Premultiply));
}
/// <inheritdoc />
public override void ToArgb32(Configuration configuration, ReadOnlySpan<Rgb24> sourcePixels, Span<Argb32> destinationPixels)
public override void ToRgba32(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Rgba32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Argb32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(destinationPixels);
PixelConverter.FromRgb24.ToRgba32(source, dest);
}
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Argb32 dp = ref Unsafe.Add(ref destRef, i);
/// <inheritdoc />
public override void FromRgba32(
Configuration configuration,
ReadOnlySpan<Rgba32> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
dp.FromRgb24(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgba32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromRgba32.ToRgb24(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(Configuration configuration, ReadOnlySpan<Rgb24> sourcePixels, Span<Bgr24> destinationPixels)
public override void ToArgb32(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Argb32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Argb32, byte>(destinationPixels);
PixelConverter.FromRgb24.ToArgb32(source, dest);
}
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Bgr24 dp = ref Unsafe.Add(ref destRef, i);
/// <inheritdoc />
public override void FromArgb32(
Configuration configuration,
ReadOnlySpan<Argb32> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
dp.FromRgb24(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Argb32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromArgb32.ToRgb24(source, dest);
}
/// <inheritdoc />
public override void ToBgra32(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Bgra32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgra32, byte>(destinationPixels);
PixelConverter.FromRgb24.ToBgra32(source, dest);
}
/// <inheritdoc />
public override void ToBgra32(Configuration configuration, ReadOnlySpan<Rgb24> sourcePixels, Span<Bgra32> destinationPixels)
public override void FromBgra32(
Configuration configuration,
ReadOnlySpan<Bgra32> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Bgra32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgra32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromBgra32.ToRgb24(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Bgra32 dp = ref Unsafe.Add(ref destRef, i);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromRgb24.ToBgr24(source, dest);
}
dp.FromRgb24(sp);
}
/// <inheritdoc />
public override void FromBgr24(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromBgr24.ToRgb24(source, dest);
}
/// <inheritdoc />
@ -179,24 +234,6 @@ namespace SixLabors.ImageSharp.PixelFormats
}
}
/// <inheritdoc />
public override void ToRgba32(Configuration configuration, ReadOnlySpan<Rgb24> sourcePixels, Span<Rgba32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Rgba32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Rgba32 dp = ref Unsafe.Add(ref destRef, i);
dp.FromRgb24(sp);
}
}
/// <inheritdoc />
public override void ToRgb48(Configuration configuration, ReadOnlySpan<Rgb24> sourcePixels, Span<Rgb48> destinationPixels)
{

134
src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs

@ -42,84 +42,112 @@ namespace SixLabors.ImageSharp.PixelFormats
}
/// <inheritdoc />
public override void ToArgb32(Configuration configuration, ReadOnlySpan<Rgba32> sourcePixels, Span<Argb32> destinationPixels)
public override void ToArgb32(
Configuration configuration,
ReadOnlySpan<Rgba32> sourcePixels,
Span<Argb32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref uint sourceRef = ref Unsafe.As<Rgba32,uint>(ref MemoryMarshal.GetReference(sourcePixels));
ref uint destRef = ref Unsafe.As<Argb32, uint>(ref MemoryMarshal.GetReference(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
uint sp = Unsafe.Add(ref sourceRef, i);
Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToArgb32(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgba32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Argb32, byte>(destinationPixels);
PixelConverter.FromRgba32.ToArgb32(source, dest);
}
/// <inheritdoc />
public override void FromArgb32(Configuration configuration, ReadOnlySpan<Argb32> sourcePixels, Span<Rgba32> destinationPixels)
public override void FromArgb32(
Configuration configuration,
ReadOnlySpan<Argb32> sourcePixels,
Span<Rgba32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref uint sourceRef = ref Unsafe.As<Argb32,uint>(ref MemoryMarshal.GetReference(sourcePixels));
ref uint destRef = ref Unsafe.As<Rgba32, uint>(ref MemoryMarshal.GetReference(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
uint sp = Unsafe.Add(ref sourceRef, i);
Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToRgba32(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Argb32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(destinationPixels);
PixelConverter.FromArgb32.ToRgba32(source, dest);
}
/// <inheritdoc />
public override void ToBgra32(Configuration configuration, ReadOnlySpan<Rgba32> sourcePixels, Span<Bgra32> destinationPixels)
public override void ToBgra32(
Configuration configuration,
ReadOnlySpan<Rgba32> sourcePixels,
Span<Bgra32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref uint sourceRef = ref Unsafe.As<Rgba32,uint>(ref MemoryMarshal.GetReference(sourcePixels));
ref uint destRef = ref Unsafe.As<Bgra32, uint>(ref MemoryMarshal.GetReference(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
uint sp = Unsafe.Add(ref sourceRef, i);
Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToBgra32(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgba32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgra32, byte>(destinationPixels);
PixelConverter.FromRgba32.ToBgra32(source, dest);
}
/// <inheritdoc />
public override void FromBgra32(Configuration configuration, ReadOnlySpan<Bgra32> sourcePixels, Span<Rgba32> destinationPixels)
public override void FromBgra32(
Configuration configuration,
ReadOnlySpan<Bgra32> sourcePixels,
Span<Rgba32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref uint sourceRef = ref Unsafe.As<Bgra32,uint>(ref MemoryMarshal.GetReference(sourcePixels));
ref uint destRef = ref Unsafe.As<Rgba32, uint>(ref MemoryMarshal.GetReference(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgra32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(destinationPixels);
PixelConverter.FromBgra32.ToRgba32(source, dest);
}
/// <inheritdoc />
public override void ToRgb24(
Configuration configuration,
ReadOnlySpan<Rgba32> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
uint sp = Unsafe.Add(ref sourceRef, i);
Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToRgba32(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgba32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromRgba32.ToRgb24(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(Configuration configuration, ReadOnlySpan<Rgba32> sourcePixels, Span<Bgr24> destinationPixels)
public override void FromRgb24(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Rgba32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Rgba32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(destinationPixels);
PixelConverter.FromRgb24.ToRgba32(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(
Configuration configuration,
ReadOnlySpan<Rgba32> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Rgba32 sp = ref Unsafe.Add(ref sourceRef, i);
ref Bgr24 dp = ref Unsafe.Add(ref destRef, i);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgba32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromRgba32.ToBgr24(source, dest);
}
dp.FromRgba32(sp);
}
/// <inheritdoc />
public override void FromBgr24(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Rgba32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(destinationPixels);
PixelConverter.FromBgr24.ToRgba32(source, dest);
}
/// <inheritdoc />
@ -194,24 +222,6 @@ namespace SixLabors.ImageSharp.PixelFormats
}
}
/// <inheritdoc />
public override void ToRgb24(Configuration configuration, ReadOnlySpan<Rgba32> sourcePixels, Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Rgba32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Rgba32 sp = ref Unsafe.Add(ref sourceRef, i);
ref Rgb24 dp = ref Unsafe.Add(ref destRef, i);
dp.FromRgba32(sp);
}
}
/// <inheritdoc />
public override void ToRgb48(Configuration configuration, ReadOnlySpan<Rgba32> sourcePixels, Span<Rgb48> destinationPixels)
{

38
src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude

@ -17,7 +17,7 @@ using System.Runtime.InteropServices;
<#+
static readonly string[] CommonPixelTypes = { "Argb32", "Bgr24", "Bgra32", "L8", "L16", "La16", "La32", "Rgb24", "Rgba32", "Rgb48", "Rgba64", "Bgra5551" };
static readonly string[] Optimized32BitTypes = { "Rgba32", "Argb32", "Bgra32" };
static readonly string[] OptimizedPixelTypes = { "Rgba32", "Argb32", "Bgra32", "Rgb24", "Bgr24" };
// Types with Rgba32-combatible to/from Vector4 conversion
static readonly string[] Rgba32CompatibleTypes = { "Argb32", "Bgra32", "Rgb24", "Bgr24" };
@ -88,35 +88,31 @@ using System.Runtime.InteropServices;
{
#>
/// <inheritdoc />
public override void To<#=otherPixelType#>(Configuration configuration, ReadOnlySpan<<#=thisPixelType#>> sourcePixels, Span<<#=otherPixelType#>> destinationPixels)
public override void To<#=otherPixelType#>(
Configuration configuration,
ReadOnlySpan<<#=thisPixelType#>> sourcePixels,
Span<<#=otherPixelType#>> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref uint sourceRef = ref Unsafe.As<<#=thisPixelType#>,uint>(ref MemoryMarshal.GetReference(sourcePixels));
ref uint destRef = ref Unsafe.As<<#=otherPixelType#>, uint>(ref MemoryMarshal.GetReference(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
uint sp = Unsafe.Add(ref sourceRef, i);
Unsafe.Add(ref destRef, i) = PixelConverter.From<#=thisPixelType#>.To<#=otherPixelType#>(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<<#=thisPixelType#>, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<<#=otherPixelType#>, byte>(destinationPixels);
PixelConverter.From<#=thisPixelType#>.To<#=otherPixelType#>(source, dest);
}
/// <inheritdoc />
public override void From<#=otherPixelType#>(Configuration configuration, ReadOnlySpan<<#=otherPixelType#>> sourcePixels, Span<<#=thisPixelType#>> destinationPixels)
public override void From<#=otherPixelType#>(
Configuration configuration,
ReadOnlySpan<<#=otherPixelType#>> sourcePixels,
Span<<#=thisPixelType#>> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref uint sourceRef = ref Unsafe.As<<#=otherPixelType#>,uint>(ref MemoryMarshal.GetReference(sourcePixels));
ref uint destRef = ref Unsafe.As<<#=thisPixelType#>, uint>(ref MemoryMarshal.GetReference(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
uint sp = Unsafe.Add(ref sourceRef, i);
Unsafe.Add(ref destRef, i) = PixelConverter.From<#=otherPixelType#>.To<#=thisPixelType#>(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<<#=otherPixelType#>, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<<#=thisPixelType#>, byte>(destinationPixels);
PixelConverter.From<#=otherPixelType#>.To<#=thisPixelType#>(source, dest);
}
<#+
}
@ -152,8 +148,8 @@ using System.Runtime.InteropServices;
GenerateRgba32CompatibleVector4ConversionMethods(pixelType, pixelType.EndsWith("32"));
}
var matching32BitTypes = Optimized32BitTypes.Contains(pixelType) ?
Optimized32BitTypes.Where(p => p != pixelType) :
var matching32BitTypes = OptimizedPixelTypes.Contains(pixelType) ?
OptimizedPixelTypes.Where(p => p != pixelType) :
Enumerable.Empty<string>();
foreach (string destPixelType in matching32BitTypes)

222
src/ImageSharp/PixelFormats/Utils/PixelConverter.cs

@ -1,7 +1,7 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Buffers.Binary;
using System;
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.PixelFormats.Utils
@ -21,88 +21,196 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
public static class FromRgba32
{
/// <summary>
/// Converts a packed <see cref="Rgba32"/> to <see cref="Argb32"/>.
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgba32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Argb32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static uint ToArgb32(uint packedRgba)
{
// packedRgba = [aa bb gg rr]
// ROTL(8, packedRgba) = [bb gg rr aa]
return (packedRgba << 8) | (packedRgba >> 24);
}
public static void ToArgb32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4<WXYZShuffle4>(source, dest, default);
/// <summary>
/// Converts a packed <see cref="Rgba32"/> to <see cref="Bgra32"/>.
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgba32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgra32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static uint ToBgra32(uint packedRgba)
{
// packedRgba = [aa bb gg rr]
// tmp1 = [aa 00 gg 00]
// tmp2 = [00 bb 00 rr]
// tmp3=ROTL(16, tmp2) = [00 rr 00 bb]
// tmp1 + tmp3 = [aa rr gg bb]
uint tmp1 = packedRgba & 0xFF00FF00;
uint tmp2 = packedRgba & 0x00FF00FF;
uint tmp3 = (tmp2 << 16) | (tmp2 >> 16);
return tmp1 + tmp3;
}
public static void ToBgra32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4<ZYXWShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgba32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgb24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgb24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3<XYZWShuffle4Slice3>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgba32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgr24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgr24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2));
}
public static class FromArgb32
{
/// <summary>
/// Converts a packed <see cref="Argb32"/> to <see cref="Rgba32"/>.
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Argb32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgba32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4<YZWXShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Argb32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgra32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgra32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4<WZYXShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Argb32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgb24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static uint ToRgba32(uint packedArgb)
{
// packedArgb = [bb gg rr aa]
// ROTR(8, packedArgb) = [aa bb gg rr]
return (packedArgb >> 8) | (packedArgb << 24);
}
public static void ToRgb24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 3, 2, 1));
/// <summary>
/// Converts a packed <see cref="Argb32"/> to <see cref="Bgra32"/>.
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Argb32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgr24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static uint ToBgra32(uint packedArgb)
{
// packedArgb = [bb gg rr aa]
// REVERSE(packedArgb) = [aa rr gg bb]
return BinaryPrimitives.ReverseEndianness(packedArgb);
}
public static void ToBgr24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 1, 2, 3));
}
public static class FromBgra32
{
/// <summary>
/// Converts a packed <see cref="Bgra32"/> to <see cref="Argb32"/>.
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Bgra32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Argb32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToArgb32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4<WZYXShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Bgra32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgra32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4<ZYXWShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Argb32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgb24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgb24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Argb32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgr24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgr24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3<XYZWShuffle4Slice3>(source, dest, default);
}
public static class FromRgb24
{
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgb24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgba32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4<XYZWPad3Shuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgba32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Argb32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToArgb32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(2, 1, 0, 3));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgba32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgra32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgra32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgb24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgr24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgr24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2));
}
public static class FromBgr24
{
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Bgr24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Argb32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToArgb32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(0, 1, 2, 3));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Bgr24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgra32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Bgr24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgra32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static uint ToArgb32(uint packedBgra)
{
// packedBgra = [aa rr gg bb]
// REVERSE(packedBgra) = [bb gg rr aa]
return BinaryPrimitives.ReverseEndianness(packedBgra);
}
public static void ToBgra32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4<XYZWPad3Shuffle4>(source, dest, default);
/// <summary>
/// Converts a packed <see cref="Rgba32"/> to <see cref="Bgra32"/>.
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Bgr24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgb24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static uint ToRgba32(uint packedBgra)
{
// packedRgba = [aa rr gg bb]
// tmp1 = [aa 00 gg 00]
// tmp2 = [00 rr 00 bb]
// tmp3=ROTL(16, tmp2) = [00 bb 00 rr]
// tmp1 + tmp3 = [aa bb gg rr]
uint tmp1 = packedBgra & 0xFF00FF00;
uint tmp2 = packedBgra & 0x00FF00FF;
uint tmp3 = (tmp2 << 16) | (tmp2 >> 16);
return tmp1 + tmp3;
}
public static void ToRgb24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2));
}
}
}
}

3
src/ImageSharp/Processing/Processors/Binarization/AdaptiveThresholdProcessor{TPixel}.cs

@ -67,7 +67,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Binarization
ref TPixel color = ref Unsafe.Add(ref rowRef, x);
color.ToRgba32(ref rgb);
sum += (ulong)(rgb.R + rgb.G + rgb.G);
sum += (ulong)(rgb.R + rgb.G + rgb.B);
if (x - startX != 0)
{
intImage[x - startX, y - startY] = intImage[x - startX - 1, y - startY] + sum;

62
src/ImageSharp/Processing/Processors/Normalization/AdaptiveHistogramEqualizationProcessor{TPixel}.cs

@ -86,42 +86,39 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
new Rectangle(0, 0, sourceWidth, tileYStartPositions.Count),
in operation);
ref TPixel pixelsBase = ref source.GetPixelReference(0, 0);
// Fix left column
ProcessBorderColumn(ref pixelsBase, cdfData, 0, sourceWidth, sourceHeight, this.Tiles, tileHeight, xStart: 0, xEnd: halfTileWidth, luminanceLevels);
ProcessBorderColumn(source, cdfData, 0, sourceHeight, this.Tiles, tileHeight, xStart: 0, xEnd: halfTileWidth, luminanceLevels);
// Fix right column
int rightBorderStartX = ((this.Tiles - 1) * tileWidth) + halfTileWidth;
ProcessBorderColumn(ref pixelsBase, cdfData, this.Tiles - 1, sourceWidth, sourceHeight, this.Tiles, tileHeight, xStart: rightBorderStartX, xEnd: sourceWidth, luminanceLevels);
ProcessBorderColumn(source, cdfData, this.Tiles - 1, sourceHeight, this.Tiles, tileHeight, xStart: rightBorderStartX, xEnd: sourceWidth, luminanceLevels);
// Fix top row
ProcessBorderRow(ref pixelsBase, cdfData, 0, sourceWidth, this.Tiles, tileWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels);
ProcessBorderRow(source, cdfData, 0, sourceWidth, this.Tiles, tileWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels);
// Fix bottom row
int bottomBorderStartY = ((this.Tiles - 1) * tileHeight) + halfTileHeight;
ProcessBorderRow(ref pixelsBase, cdfData, this.Tiles - 1, sourceWidth, this.Tiles, tileWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels);
ProcessBorderRow(source, cdfData, this.Tiles - 1, sourceWidth, this.Tiles, tileWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels);
// Left top corner
ProcessCornerTile(ref pixelsBase, cdfData, sourceWidth, 0, 0, xStart: 0, xEnd: halfTileWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels);
ProcessCornerTile(source, cdfData, 0, 0, xStart: 0, xEnd: halfTileWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels);
// Left bottom corner
ProcessCornerTile(ref pixelsBase, cdfData, sourceWidth, 0, this.Tiles - 1, xStart: 0, xEnd: halfTileWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels);
ProcessCornerTile(source, cdfData, 0, this.Tiles - 1, xStart: 0, xEnd: halfTileWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels);
// Right top corner
ProcessCornerTile(ref pixelsBase, cdfData, sourceWidth, this.Tiles - 1, 0, xStart: rightBorderStartX, xEnd: sourceWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels);
ProcessCornerTile(source, cdfData, this.Tiles - 1, 0, xStart: rightBorderStartX, xEnd: sourceWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels);
// Right bottom corner
ProcessCornerTile(ref pixelsBase, cdfData, sourceWidth, this.Tiles - 1, this.Tiles - 1, xStart: rightBorderStartX, xEnd: sourceWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels);
ProcessCornerTile(source, cdfData, this.Tiles - 1, this.Tiles - 1, xStart: rightBorderStartX, xEnd: sourceWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels);
}
}
/// <summary>
/// Processes the part of a corner tile which was previously left out. It consists of 1 / 4 of a tile and does not need interpolation.
/// </summary>
/// <param name="pixelsBase">The output pixels base reference.</param>
/// <param name="source">The source image.</param>
/// <param name="cdfData">The lookup table to remap the grey values.</param>
/// <param name="sourceWidth">The source image width.</param>
/// <param name="cdfX">The x-position in the CDF lookup map.</param>
/// <param name="cdfY">The y-position in the CDF lookup map.</param>
/// <param name="xStart">X start position.</param>
@ -133,9 +130,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
/// or 65536 for 16-bit grayscale images.
/// </param>
private static void ProcessCornerTile(
ref TPixel pixelsBase,
ImageFrame<TPixel> source,
CdfTileData cdfData,
int sourceWidth,
int cdfX,
int cdfY,
int xStart,
@ -146,10 +142,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
{
for (int dy = yStart; dy < yEnd; dy++)
{
int dyOffSet = dy * sourceWidth;
Span<TPixel> rowSpan = source.GetPixelRowSpan(dy);
for (int dx = xStart; dx < xEnd; dx++)
{
ref TPixel pixel = ref Unsafe.Add(ref pixelsBase, dyOffSet + dx);
ref TPixel pixel = ref rowSpan[dx];
float luminanceEqualized = cdfData.RemapGreyValue(cdfX, cdfY, GetLuminance(pixel, luminanceLevels));
pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W));
}
@ -159,10 +155,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
/// <summary>
/// Processes a border column of the image which is half the size of the tile width.
/// </summary>
/// <param name="pixelBase">The output pixels reference.</param>
/// <param name="source">The source image.</param>
/// <param name="cdfData">The pre-computed lookup tables to remap the grey values for each tiles.</param>
/// <param name="cdfX">The X index of the lookup table to use.</param>
/// <param name="sourceWidth">The source image width.</param>
/// <param name="sourceHeight">The source image height.</param>
/// <param name="tileCount">The number of vertical tiles.</param>
/// <param name="tileHeight">The height of a tile.</param>
@ -173,10 +168,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
/// or 65536 for 16-bit grayscale images.
/// </param>
private static void ProcessBorderColumn(
ref TPixel pixelBase,
ImageFrame<TPixel> source,
CdfTileData cdfData,
int cdfX,
int sourceWidth,
int sourceHeight,
int tileCount,
int tileHeight,
@ -194,10 +188,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
int tileY = 0;
for (int dy = y; dy < yLimit; dy++)
{
int dyOffSet = dy * sourceWidth;
Span<TPixel> rowSpan = source.GetPixelRowSpan(dy);
for (int dx = xStart; dx < xEnd; dx++)
{
ref TPixel pixel = ref Unsafe.Add(ref pixelBase, dyOffSet + dx);
ref TPixel pixel = ref rowSpan[dx];
float luminanceEqualized = InterpolateBetweenTwoTiles(pixel, cdfData, cdfX, cdfY, cdfX, cdfY + 1, tileY, tileHeight, luminanceLevels);
pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W));
}
@ -213,7 +207,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
/// <summary>
/// Processes a border row of the image which is half of the size of the tile height.
/// </summary>
/// <param name="pixelBase">The output pixels base reference.</param>
/// <param name="source">The source image.</param>
/// <param name="cdfData">The pre-computed lookup tables to remap the grey values for each tiles.</param>
/// <param name="cdfY">The Y index of the lookup table to use.</param>
/// <param name="sourceWidth">The source image width.</param>
@ -226,7 +220,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
/// or 65536 for 16-bit grayscale images.
/// </param>
private static void ProcessBorderRow(
ref TPixel pixelBase,
ImageFrame<TPixel> source,
CdfTileData cdfData,
int cdfY,
int sourceWidth,
@ -244,12 +238,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
{
for (int dy = yStart; dy < yEnd; dy++)
{
int dyOffSet = dy * sourceWidth;
Span<TPixel> rowSpan = source.GetPixelRowSpan(dy);
int tileX = 0;
int xLimit = Math.Min(x + tileWidth, sourceWidth - 1);
for (int dx = x; dx < xLimit; dx++)
{
ref TPixel pixel = ref Unsafe.Add(ref pixelBase, dyOffSet + dx);
ref TPixel pixel = ref rowSpan[dx];
float luminanceEqualized = InterpolateBetweenTwoTiles(pixel, cdfData, cdfX, cdfY, cdfX + 1, cdfY, tileX, tileWidth, luminanceLevels);
pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W));
tileX++;
@ -410,8 +404,6 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
[MethodImpl(InliningOptions.ShortMethod)]
public void Invoke(in RowInterval rows)
{
ref TPixel sourceBase = ref this.source.GetPixelReference(0, 0);
for (int index = rows.Min; index < rows.Max; index++)
{
(int y, int cdfY) tileYStartPosition = this.tileYStartPositions[index];
@ -427,11 +419,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
int xEnd = Math.Min(x + this.tileWidth, this.sourceWidth);
for (int dy = y; dy < yEnd; dy++)
{
int dyOffSet = dy * this.sourceWidth;
Span<TPixel> rowSpan = this.source.GetPixelRowSpan(dy);
int tileX = 0;
for (int dx = x; dx < xEnd; dx++)
{
ref TPixel pixel = ref Unsafe.Add(ref sourceBase, dyOffSet + dx);
ref TPixel pixel = ref rowSpan[dx];
float luminanceEqualized = InterpolateBetweenFourTiles(
pixel,
this.cdfData,
@ -597,15 +589,13 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
[MethodImpl(InliningOptions.ShortMethod)]
public void Invoke(in RowInterval rows)
{
ref TPixel sourceBase = ref this.source.GetPixelReference(0, 0);
for (int index = rows.Min; index < rows.Max; index++)
{
int cdfX = 0;
int cdfY = this.tileYStartPositions[index].cdfY;
int y = this.tileYStartPositions[index].y;
int endY = Math.Min(y + this.tileHeight, this.sourceHeight);
ref int cdfMinBase = ref MemoryMarshal.GetReference(this.cdfMinBuffer2D.GetRowSpan(cdfY));
Span<int> cdfMinSpan = this.cdfMinBuffer2D.GetRowSpan(cdfY);
using IMemoryOwner<int> histogramBuffer = this.allocator.Allocate<int>(this.luminanceLevels);
Span<int> histogram = histogramBuffer.GetSpan();
@ -620,10 +610,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
int xlimit = Math.Min(x + this.tileWidth, this.sourceWidth);
for (int dy = y; dy < endY; dy++)
{
int dyOffset = dy * this.sourceWidth;
Span<TPixel> rowSpan = this.source.GetPixelRowSpan(dy);
for (int dx = x; dx < xlimit; dx++)
{
int luminance = GetLuminance(Unsafe.Add(ref sourceBase, dyOffset + dx), this.luminanceLevels);
int luminance = GetLuminance(rowSpan[dx], this.luminanceLevels);
histogram[luminance]++;
}
}
@ -633,7 +623,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
this.processor.ClipHistogram(histogram, this.processor.ClipLimit);
}
Unsafe.Add(ref cdfMinBase, cdfX) = this.processor.CalculateCdf(ref cdfBase, ref histogramBase, histogram.Length - 1);
cdfMinSpan[cdfX] += this.processor.CalculateCdf(ref cdfBase, ref histogramBase, histogram.Length - 1);
cdfX++;
}

36
src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs

@ -6,6 +6,7 @@ using System.Buffers;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading;
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
@ -51,7 +52,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
using IMemoryOwner<int> histogramBuffer = memoryAllocator.Allocate<int>(this.LuminanceLevels, AllocationOptions.Clean);
// Build the histogram of the grayscale levels
// Build the histogram of the grayscale levels.
var grayscaleOperation = new GrayscaleLevelsRowOperation(interest, histogramBuffer, source, this.LuminanceLevels);
ParallelRowIterator.IterateRows(
this.Configuration,
@ -106,16 +107,24 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
}
/// <inheritdoc/>
#if NETSTANDARD2_0
// https://github.com/SixLabors/ImageSharp/issues/1204
[MethodImpl(MethodImplOptions.NoOptimization)]
#else
[MethodImpl(InliningOptions.ShortMethod)]
#endif
public void Invoke(int y)
{
ref int histogramBase = ref MemoryMarshal.GetReference(this.histogramBuffer.GetSpan());
ref TPixel pixelBase = ref MemoryMarshal.GetReference(this.source.GetPixelRowSpan(y));
Span<TPixel> pixelRow = this.source.GetPixelRowSpan(y);
int levels = this.luminanceLevels;
for (int x = 0; x < this.bounds.Width; x++)
{
int luminance = GetLuminance(Unsafe.Add(ref pixelBase, x), this.luminanceLevels);
Unsafe.Add(ref histogramBase, luminance)++;
// TODO: We should bulk convert here.
var vector = pixelRow[x].ToVector4();
int luminance = ImageMaths.GetBT709Luminance(ref vector, levels);
Interlocked.Increment(ref Unsafe.Add(ref histogramBase, luminance));
}
}
}
@ -147,18 +156,27 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization
}
/// <inheritdoc/>
#if NETSTANDARD2_0
// https://github.com/SixLabors/ImageSharp/issues/1204
[MethodImpl(MethodImplOptions.NoOptimization)]
#else
[MethodImpl(InliningOptions.ShortMethod)]
#endif
public void Invoke(int y)
{
ref int cdfBase = ref MemoryMarshal.GetReference(this.cdfBuffer.GetSpan());
ref TPixel pixelBase = ref MemoryMarshal.GetReference(this.source.GetPixelRowSpan(y));
Span<TPixel> pixelRow = this.source.GetPixelRowSpan(y);
int levels = this.luminanceLevels;
float noOfPixelsMinusCdfMin = this.numberOfPixelsMinusCdfMin;
for (int x = 0; x < this.bounds.Width; x++)
{
ref TPixel pixel = ref Unsafe.Add(ref pixelBase, x);
int luminance = GetLuminance(pixel, this.luminanceLevels);
float luminanceEqualized = Unsafe.Add(ref cdfBase, luminance) / this.numberOfPixelsMinusCdfMin;
pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W));
// TODO: We should bulk convert here.
ref TPixel pixel = ref pixelRow[x];
var vector = pixel.ToVector4();
int luminance = ImageMaths.GetBT709Luminance(ref vector, levels);
float luminanceEqualized = Unsafe.Add(ref cdfBase, luminance) / noOfPixelsMinusCdfMin;
pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, vector.W));
}
}
}

19
tests/Directory.Build.targets

@ -26,18 +26,21 @@
<ItemGroup>
<!--Test Dependencies-->
<PackageReference Update="BenchmarkDotNet" Version="0.12.1" />
<PackageReference Update="BenchmarkDotNet.Diagnostics.Windows" Version="0.12.1" Condition="'$(OS)' == 'Windows_NT'" />
<PackageReference Update="BenchmarkDotNet.Diagnostics.Windows" Version="0.12.1" Condition="'$(OS)' == 'Windows_NT'" />
<PackageReference Update="Colourful" Version="2.0.5" />
<PackageReference Update="coverlet.collector" Version="1.3.0" PrivateAssets="All"/>
<PackageReference Update="Magick.NET-Q16-AnyCPU" Version="7.15.5" />
<PackageReference Update="Microsoft.DotNet.RemoteExecutor" Version="5.0.0-beta.20069.1" />
<PackageReference Update="Microsoft.NET.Test.Sdk" Version="16.5.0-preview-20200116-01" />
<PackageReference Update="Moq" Version="4.14.5" />
<PackageReference Update="coverlet.collector" Version="1.3.1-preview.27.gdd2237a3be" PrivateAssets="All"/>
<PackageReference Update="Magick.NET-Q16-AnyCPU" Version="7.22.0" />
<PackageReference Update="Microsoft.DotNet.RemoteExecutor" Version="6.0.0-beta.20513.1" />
<PackageReference Update="Microsoft.DotNet.XUnitExtensions" Version="6.0.0-beta.20513.1" />
<PackageReference Update="Microsoft.NET.Test.Sdk" Version="16.7.1" />
<PackageReference Update="Moq" Version="4.14.6" />
<PackageReference Update="Pfim" Version="0.9.1" />
<PackageReference Update="SharpZipLib" Version="1.2.0" />
<!-- Enable support for using libgdiplus on macOS -->
<PackageReference Include="runtime.osx.10.10-x64.CoreCompat.System.Drawing" Version="5.8.64" Condition="$([MSBuild]::IsOSPlatform('OSX'))" />
<PackageReference Update="SharpZipLib" Version="1.3.0" />
<PackageReference Update="System.Drawing.Common" Version="4.7.0" />
<PackageReference Update="xunit" Version="2.4.1" />
<PackageReference Update="xunit.runner.visualstudio" Version="2.4.1" />
<PackageReference Update="xunit.runner.visualstudio" Version="2.4.3" />
</ItemGroup>
</Project>

21
tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs

@ -0,0 +1,21 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Block8x8F_AddInPlace
{
[Benchmark]
public float AddInplace()
{
float f = 42F;
Block8x8F b = default;
b.AddInPlace(f);
return f;
}
}
}

37
tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs

@ -0,0 +1,37 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Block8x8F_MultiplyInPlaceBlock
{
private static readonly Block8x8F Source = Create8x8FloatData();
[Benchmark]
public void MultiplyInPlaceBlock()
{
Block8x8F dest = default;
Source.MultiplyInPlace(ref dest);
}
private static Block8x8F Create8x8FloatData()
{
var result = new float[64];
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 8; j++)
{
result[(i * 8) + j] = (i * 10) + j;
}
}
var source = default(Block8x8F);
source.LoadFrom(result);
return source;
}
}
}

21
tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs

@ -0,0 +1,21 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Block8x8F_MultiplyInPlaceScalar
{
[Benchmark]
public float MultiplyInPlaceScalar()
{
float f = 42F;
Block8x8F b = default;
b.MultiplyInPlace(f);
return f;
}
}
}

37
tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs

@ -0,0 +1,37 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Block8x8F_Transpose
{
private static readonly Block8x8F Source = Create8x8FloatData();
[Benchmark]
public void TransposeInto()
{
var dest = default(Block8x8F);
Source.TransposeInto(ref dest);
}
private static Block8x8F Create8x8FloatData()
{
var result = new float[64];
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 8; j++)
{
result[(i * 8) + j] = (i * 10) + j;
}
}
var source = default(Block8x8F);
source.LoadFrom(result);
return source;
}
}
}

41
tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs

@ -0,0 +1,41 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
[Config(typeof(Config.ShortClr))]
public class CmykColorConversion : ColorConversionBenchmark
{
public CmykColorConversion()
: base(4)
{
}
[Benchmark(Baseline = true)]
public void Scalar()
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
new JpegColorConverter.FromCmykBasic(8).ConvertToRgba(values, this.output);
}
[Benchmark]
public void SimdVector8()
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
new JpegColorConverter.FromCmykVector8(8).ConvertToRgba(values, this.output);
}
[Benchmark]
public void SimdVectorAvx2()
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
new JpegColorConverter.FromCmykAvx2(8).ConvertToRgba(values, this.output);
}
}
}

64
tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs

@ -0,0 +1,64 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
public abstract class ColorConversionBenchmark
{
private readonly int componentCount;
protected Buffer2D<float>[] input;
protected Vector4[] output;
protected ColorConversionBenchmark(int componentCount)
{
this.componentCount = componentCount;
}
public const int Count = 128;
[GlobalSetup]
public void Setup()
{
this.input = CreateRandomValues(this.componentCount, Count);
this.output = new Vector4[Count];
}
[GlobalCleanup]
public void Cleanup()
{
foreach (Buffer2D<float> buffer in this.input)
{
buffer.Dispose();
}
}
private static Buffer2D<float>[] CreateRandomValues(
int componentCount,
int inputBufferLength,
float minVal = 0f,
float maxVal = 255f)
{
var rnd = new Random(42);
var buffers = new Buffer2D<float>[componentCount];
for (int i = 0; i < componentCount; i++)
{
var values = new float[inputBufferLength];
for (int j = 0; j < inputBufferLength; j++)
{
values[j] = ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal;
}
// no need to dispose when buffer is not array owner
buffers[i] = Configuration.Default.MemoryAllocator.Allocate2D<float>(values.Length, 1);
}
return buffers;
}
}
}

33
tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs

@ -0,0 +1,33 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
[Config(typeof(Config.ShortClr))]
public class GrayscaleColorConversion : ColorConversionBenchmark
{
public GrayscaleColorConversion()
: base(1)
{
}
[Benchmark(Baseline = true)]
public void Scalar()
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
new JpegColorConverter.FromGrayscaleBasic(8).ConvertToRgba(values, this.output);
}
[Benchmark]
public void SimdVectorAvx2()
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
new JpegColorConverter.FromGrayscaleAvx2(8).ConvertToRgba(values, this.output);
}
}
}

41
tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs

@ -0,0 +1,41 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
[Config(typeof(Config.ShortClr))]
public class RgbColorConversion : ColorConversionBenchmark
{
public RgbColorConversion()
: base(3)
{
}
[Benchmark(Baseline = true)]
public void Scalar()
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
new JpegColorConverter.FromRgbBasic(8).ConvertToRgba(values, this.output);
}
[Benchmark]
public void SimdVector8()
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
new JpegColorConverter.FromRgbVector8(8).ConvertToRgba(values, this.output);
}
[Benchmark]
public void SimdVectorAvx2()
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
new JpegColorConverter.FromRgbAvx2(8).ConvertToRgba(values, this.output);
}
}
}

59
tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs

@ -1,39 +1,18 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters;
using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
[Config(typeof(Config.ShortClr))]
public class YCbCrColorConversion
public class YCbCrColorConversion : ColorConversionBenchmark
{
private Buffer2D<float>[] input;
private Vector4[] output;
public const int Count = 128;
[GlobalSetup]
public void Setup()
public YCbCrColorConversion()
: base(3)
{
this.input = CreateRandomValues(3, Count);
this.output = new Vector4[Count];
}
[GlobalCleanup]
public void Cleanup()
{
foreach (Buffer2D<float> buffer in this.input)
{
buffer.Dispose();
}
}
[Benchmark]
@ -41,15 +20,15 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
JpegColorConverter.FromYCbCrBasic.ConvertCore(values, this.output, 255F, 128F);
new JpegColorConverter.FromYCbCrBasic(8).ConvertToRgba(values, this.output);
}
[Benchmark(Baseline = true)]
public void SimdVector4()
public void SimdVector()
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
JpegColorConverter.FromYCbCrSimd.ConvertCore(values, this.output, 255F, 128F);
new JpegColorConverter.FromYCbCrVector4(8).ConvertToRgba(values, this.output);
}
[Benchmark]
@ -57,31 +36,15 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
JpegColorConverter.FromYCbCrSimdVector8.ConvertCore(values, this.output, 255F, 128F);
new JpegColorConverter.FromYCbCrVector8(8).ConvertToRgba(values, this.output);
}
private static Buffer2D<float>[] CreateRandomValues(
int componentCount,
int inputBufferLength,
float minVal = 0f,
float maxVal = 255f)
[Benchmark]
public void SimdVectorAvx2()
{
var rnd = new Random(42);
var buffers = new Buffer2D<float>[componentCount];
for (int i = 0; i < componentCount; i++)
{
var values = new float[inputBufferLength];
for (int j = 0; j < inputBufferLength; j++)
{
values[j] = ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal;
}
// no need to dispose when buffer is not array owner
buffers[i] = Configuration.Default.MemoryAllocator.Allocate2D<float>(values.Length, 1);
}
var values = new JpegColorConverter.ComponentValues(this.input, 0);
return buffers;
new JpegColorConverter.FromYCbCrAvx2(8).ConvertToRgba(values, this.output);
}
}
}

41
tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs

@ -0,0 +1,41 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
[Config(typeof(Config.ShortClr))]
public class YccKColorConverter : ColorConversionBenchmark
{
public YccKColorConverter()
: base(4)
{
}
[Benchmark(Baseline = true)]
public void Scalar()
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
new JpegColorConverter.FromYccKBasic(8).ConvertToRgba(values, this.output);
}
[Benchmark]
public void SimdVector8()
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
new JpegColorConverter.FromYccKVector8(8).ConvertToRgba(values, this.output);
}
[Benchmark]
public void SimdVectorAvx2()
{
var values = new JpegColorConverter.ComponentValues(this.input, 0);
new JpegColorConverter.FromYccKAvx2(8).ConvertToRgba(values, this.output);
}
}
}

14
tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs

@ -13,15 +13,13 @@ using System.Runtime.Intrinsics.X86;
#endif
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Environments;
using BenchmarkDotNet.Jobs;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
// ReSharper disable InconsistentNaming
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.ShortClr))]
[Config(typeof(Config.ShortCore31))]
public abstract class FromVector4<TPixel>
where TPixel : unmanaged, IPixel<TPixel>
{
@ -32,7 +30,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
protected Configuration Configuration => Configuration.Default;
// [Params(64, 2048)]
[Params(1024)]
[Params(64, 256, 2048)]
public int Count { get; set; }
[GlobalSetup]
@ -60,7 +58,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
}
}
[Benchmark]
[Benchmark(Baseline = true)]
public void PixelOperations_Base()
{
new PixelOperations<TPixel>().FromVector4Destructive(this.Configuration, this.source.GetSpan(), this.destination.GetSpan());
@ -93,7 +91,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
SimdUtils.BasicIntrinsics256.NormalizedFloatToByteSaturate(sBytes, dFloats);
}
[Benchmark(Baseline = true)]
[Benchmark]
public void ExtendedIntrinsic()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
@ -104,12 +102,12 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
#if SUPPORTS_RUNTIME_INTRINSICS
[Benchmark]
public void UseAvx2()
public void UseHwIntrinsics()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
SimdUtils.Avx2Intrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats);
SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats);
}
private static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 };

55
tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs

@ -0,0 +1,55 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.ShortClr))]
public class FromVector4_Rgb24 : FromVector4<Rgb24>
{
}
}
// 2020-11-02
// ##########
//
// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1)
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT
// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT
// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// IterationCount=3 LaunchCount=1 WarmupCount=3
//
// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:|
// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 64 | 343.2 ns | 305.91 ns | 16.77 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 64 | 320.8 ns | 19.93 ns | 1.09 ns | 0.94 | 0.05 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 64 | 234.3 ns | 17.98 ns | 0.99 ns | 1.00 | 0.00 | 0.0052 | - | - | 24 B |
// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 64 | 246.0 ns | 82.34 ns | 4.51 ns | 1.05 | 0.02 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 64 | 222.3 ns | 39.46 ns | 2.16 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 64 | 243.4 ns | 33.58 ns | 1.84 ns | 1.09 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 256 | 824.9 ns | 32.77 ns | 1.80 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 256 | 967.0 ns | 39.09 ns | 2.14 ns | 1.17 | 0.01 | 0.0172 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 256 | 756.9 ns | 94.43 ns | 5.18 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B |
// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 256 | 1,003.3 ns | 3,192.09 ns | 174.97 ns | 1.32 | 0.22 | 0.0172 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 256 | 748.6 ns | 248.03 ns | 13.60 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 256 | 437.0 ns | 36.48 ns | 2.00 ns | 0.58 | 0.01 | 0.0172 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 2048 | 5,751.6 ns | 704.24 ns | 38.60 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 2048 | 4,391.6 ns | 718.17 ns | 39.37 ns | 0.76 | 0.00 | 0.0153 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 2048 | 6,202.0 ns | 1,815.18 ns | 99.50 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 2048 | 4,225.6 ns | 1,004.03 ns | 55.03 ns | 0.68 | 0.01 | 0.0153 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 2048 | 6,157.1 ns | 2,516.98 ns | 137.96 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 2048 | 1,822.7 ns | 1,764.43 ns | 96.71 ns | 0.30 | 0.02 | 0.0172 | - | - | 72 B |

87
tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs

@ -0,0 +1,87 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Pad3Shuffle4Channel
{
private static readonly DefaultPad3Shuffle4 Control = new DefaultPad3Shuffle4(1, 0, 3, 2);
private static readonly XYZWPad3Shuffle4 ControlFast = default;
private byte[] source;
private byte[] destination;
[GlobalSetup]
public void Setup()
{
this.source = new byte[this.Count];
new Random(this.Count).NextBytes(this.source);
this.destination = new byte[this.Count * 4 / 3];
}
[Params(96, 384, 768, 1536)]
public int Count { get; set; }
[Benchmark]
public void Pad3Shuffle4()
{
SimdUtils.Pad3Shuffle4(this.source, this.destination, Control);
}
[Benchmark]
public void Pad3Shuffle4FastFallback()
{
SimdUtils.Pad3Shuffle4(this.source, this.destination, ControlFast);
}
}
// 2020-10-30
// ##########
//
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// Runtime=.NET Core 3.1
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |------------------------- |------------------- |-------------------------------------------------- |------ |------------:|----------:|----------:|------------:|------:|--------:|------:|------:|------:|----------:|
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 120.64 ns | 7.190 ns | 21.200 ns | 114.26 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 96 | 23.63 ns | 0.175 ns | 0.155 ns | 23.65 ns | 0.15 | 0.01 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 25.25 ns | 0.356 ns | 0.298 ns | 25.27 ns | 0.17 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 14.80 ns | 0.358 ns | 1.032 ns | 14.64 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 96 | 24.84 ns | 0.376 ns | 0.333 ns | 24.74 ns | 1.57 | 0.06 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 96 | 24.58 ns | 0.471 ns | 0.704 ns | 24.38 ns | 1.60 | 0.09 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 258.92 ns | 4.873 ns | 4.069 ns | 257.95 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 384 | 41.41 ns | 0.859 ns | 1.204 ns | 41.33 ns | 0.16 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 40.74 ns | 0.848 ns | 0.793 ns | 40.48 ns | 0.16 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 74.50 ns | 0.490 ns | 0.383 ns | 74.49 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 384 | 40.74 ns | 0.624 ns | 0.584 ns | 40.72 ns | 0.55 | 0.01 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 384 | 38.28 ns | 0.534 ns | 0.417 ns | 38.22 ns | 0.51 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 503.91 ns | 6.466 ns | 6.048 ns | 501.58 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 768 | 62.86 ns | 0.332 ns | 0.277 ns | 62.80 ns | 0.12 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 64.59 ns | 0.469 ns | 0.415 ns | 64.62 ns | 0.13 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 110.51 ns | 0.592 ns | 0.554 ns | 110.33 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 768 | 64.72 ns | 1.306 ns | 1.090 ns | 64.51 ns | 0.59 | 0.01 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 768 | 62.11 ns | 0.816 ns | 0.682 ns | 61.98 ns | 0.56 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 1,005.84 ns | 13.176 ns | 12.325 ns | 1,004.70 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 110.05 ns | 0.256 ns | 0.214 ns | 110.04 ns | 0.11 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.545 ns | 0.483 ns | 110.09 ns | 0.11 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - |
}

68
tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs

@ -0,0 +1,68 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.ShortCore31))]
public class PremultiplyVector4
{
private static readonly Vector4[] Vectors = CreateVectors();
[Benchmark(Baseline = true)]
public void PremultiplyBaseline()
{
ref Vector4 baseRef = ref MemoryMarshal.GetReference<Vector4>(Vectors);
for (int i = 0; i < Vectors.Length; i++)
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
Premultiply(ref v);
}
}
[Benchmark]
public void Premultiply()
{
Vector4Utilities.Premultiply(Vectors);
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void Premultiply(ref Vector4 source)
{
float w = source.W;
source *= w;
source.W = w;
}
private static Vector4[] CreateVectors()
{
var rnd = new Random(42);
return GenerateRandomVectorArray(rnd, 2048, 0, 1);
}
private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal)
{
var values = new Vector4[length];
for (int i = 0; i < length; i++)
{
ref Vector4 v = ref values[i];
v.X = GetRandomFloat(rnd, minVal, maxVal);
v.Y = GetRandomFloat(rnd, minVal, maxVal);
v.Z = GetRandomFloat(rnd, minVal, maxVal);
v.W = GetRandomFloat(rnd, minVal, maxVal);
}
return values;
}
private static float GetRandomFloat(Random rnd, float minVal, float maxVal)
=> ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal;
}
}

64
tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs

@ -0,0 +1,64 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Shuffle3Channel
{
private static readonly DefaultShuffle3 Control = new DefaultShuffle3(1, 0, 2);
private byte[] source;
private byte[] destination;
[GlobalSetup]
public void Setup()
{
this.source = new byte[this.Count];
new Random(this.Count).NextBytes(this.source);
this.destination = new byte[this.Count];
}
[Params(96, 384, 768, 1536)]
public int Count { get; set; }
[Benchmark]
public void Shuffle3()
{
SimdUtils.Shuffle3(this.source, this.destination, Control);
}
}
// 2020-11-02
// ##########
//
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// Runtime=.NET Core 3.1
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |--------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:|
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 48.46 ns | 1.034 ns | 2.438 ns | 47.46 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle3 | 2. AVX | Empty | 96 | 32.42 ns | 0.537 ns | 0.476 ns | 32.34 ns | 0.66 | 0.04 | - | - | - | - |
// | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 32.51 ns | 0.373 ns | 0.349 ns | 32.56 ns | 0.66 | 0.03 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 199.04 ns | 1.512 ns | 1.180 ns | 199.17 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle3 | 2. AVX | Empty | 384 | 71.20 ns | 2.654 ns | 7.784 ns | 69.60 ns | 0.41 | 0.02 | - | - | - | - |
// | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 63.23 ns | 0.569 ns | 0.505 ns | 63.21 ns | 0.32 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 391.28 ns | 5.087 ns | 3.972 ns | 391.22 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle3 | 2. AVX | Empty | 768 | 109.12 ns | 2.149 ns | 2.010 ns | 108.66 ns | 0.28 | 0.01 | - | - | - | - |
// | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 106.51 ns | 0.734 ns | 0.613 ns | 106.56 ns | 0.27 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 773.70 ns | 5.516 ns | 4.890 ns | 772.96 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle3 | 2. AVX | Empty | 1536 | 190.41 ns | 1.090 ns | 0.851 ns | 190.38 ns | 0.25 | 0.00 | - | - | - | - |
// | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 190.94 ns | 0.985 ns | 0.769 ns | 190.85 ns | 0.25 | 0.00 | - | - | - | - |
}

95
tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs

@ -0,0 +1,95 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Shuffle4Slice3Channel
{
private static readonly DefaultShuffle4Slice3 Control = new DefaultShuffle4Slice3(1, 0, 3, 2);
private static readonly XYZWShuffle4Slice3 ControlFast = default;
private byte[] source;
private byte[] destination;
[GlobalSetup]
public void Setup()
{
this.source = new byte[this.Count];
new Random(this.Count).NextBytes(this.source);
this.destination = new byte[(int)(this.Count * (3 / 4F))];
}
[Params(128, 256, 512, 1024, 2048)]
public int Count { get; set; }
[Benchmark]
public void Shuffle4Slice3()
{
SimdUtils.Shuffle4Slice3(this.source, this.destination, Control);
}
[Benchmark]
public void Shuffle4Slice3FastFallback()
{
SimdUtils.Shuffle4Slice3(this.source, this.destination, ControlFast);
}
}
// 2020-10-29
// ##########
//
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// Runtime=.NET Core 3.1
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |--------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:|
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 56.44 ns | 2.843 ns | 8.382 ns | 56.70 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 128 | 27.15 ns | 0.556 ns | 0.762 ns | 27.34 ns | 0.41 | 0.03 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 128 | 26.36 ns | 0.321 ns | 0.268 ns | 26.26 ns | 0.38 | 0.02 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 25.85 ns | 0.494 ns | 0.462 ns | 25.84 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 128 | 26.15 ns | 0.113 ns | 0.106 ns | 26.16 ns | 1.01 | 0.02 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 128 | 25.57 ns | 0.078 ns | 0.061 ns | 25.56 ns | 0.99 | 0.02 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 97.47 ns | 0.327 ns | 0.289 ns | 97.35 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 256 | 32.61 ns | 0.107 ns | 0.095 ns | 32.62 ns | 0.33 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.21 ns | 0.169 ns | 0.150 ns | 33.15 ns | 0.34 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 52.34 ns | 0.779 ns | 0.729 ns | 51.94 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 256 | 32.16 ns | 0.111 ns | 0.104 ns | 32.16 ns | 0.61 | 0.01 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.61 ns | 0.342 ns | 0.319 ns | 33.62 ns | 0.64 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 210.74 ns | 3.825 ns | 5.956 ns | 207.70 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 512 | 51.03 ns | 0.535 ns | 0.501 ns | 51.18 ns | 0.24 | 0.01 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 512 | 66.60 ns | 1.313 ns | 1.613 ns | 65.93 ns | 0.31 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 119.12 ns | 1.905 ns | 1.689 ns | 118.52 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 512 | 50.33 ns | 0.382 ns | 0.339 ns | 50.41 ns | 0.42 | 0.01 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 512 | 49.25 ns | 0.555 ns | 0.492 ns | 49.26 ns | 0.41 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 423.55 ns | 4.891 ns | 4.336 ns | 423.27 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 1024 | 77.13 ns | 1.355 ns | 2.264 ns | 76.19 ns | 0.19 | 0.01 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 79.39 ns | 0.103 ns | 0.086 ns | 79.37 ns | 0.19 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 226.57 ns | 2.930 ns | 2.598 ns | 226.10 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 1024 | 80.25 ns | 1.647 ns | 2.082 ns | 80.98 ns | 0.35 | 0.01 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 84.99 ns | 1.234 ns | 1.155 ns | 85.60 ns | 0.38 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 794.96 ns | 1.735 ns | 1.538 ns | 795.15 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 2048 | 128.41 ns | 0.417 ns | 0.390 ns | 128.24 ns | 0.16 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 127.24 ns | 0.294 ns | 0.229 ns | 127.23 ns | 0.16 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 382.97 ns | 1.064 ns | 0.831 ns | 382.87 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 2048 | 126.93 ns | 0.382 ns | 0.339 ns | 126.94 ns | 0.33 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 149.36 ns | 1.875 ns | 1.754 ns | 149.33 ns | 0.39 | 0.00 | - | - | - | - |
}

67
tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs

@ -0,0 +1,67 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class ShuffleByte4Channel
{
private byte[] source;
private byte[] destination;
[GlobalSetup]
public void Setup()
{
this.source = new byte[this.Count];
new Random(this.Count).NextBytes(this.source);
this.destination = new byte[this.Count];
}
[Params(128, 256, 512, 1024, 2048)]
public int Count { get; set; }
[Benchmark]
public void Shuffle4Channel()
{
SimdUtils.Shuffle4<WXYZShuffle4>(this.source, this.destination, default);
}
}
// 2020-10-29
// ##########
//
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// Runtime=.NET Core 3.1
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - |
}

68
tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs

@ -0,0 +1,68 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Tests;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class ShuffleFloat4Channel
{
private static readonly byte Control = default(WXYZShuffle4).Control;
private float[] source;
private float[] destination;
[GlobalSetup]
public void Setup()
{
this.source = new Random(this.Count).GenerateRandomFloatArray(this.Count, 0, 256);
this.destination = new float[this.Count];
}
[Params(128, 256, 512, 1024, 2048)]
public int Count { get; set; }
[Benchmark]
public void Shuffle4Channel()
{
SimdUtils.Shuffle4(this.source, this.destination, Control);
}
}
// 2020-10-29
// ##########
//
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// Runtime=.NET Core 3.1
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------- |------------------- |-------------------------------------------------- |------ |-----------:|----------:|----------:|------:|------:|------:|------:|----------:|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.647 ns | 0.5475 ns | 0.4853 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 128 | 9.818 ns | 0.1457 ns | 0.1292 ns | 0.15 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 15.267 ns | 0.1005 ns | 0.0940 ns | 0.24 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 125.586 ns | 1.9312 ns | 1.8064 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 256 | 15.878 ns | 0.1983 ns | 0.1758 ns | 0.13 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 29.170 ns | 0.2925 ns | 0.2442 ns | 0.23 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 263.859 ns | 2.6660 ns | 2.3634 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 512 | 29.452 ns | 0.3334 ns | 0.3118 ns | 0.11 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 52.912 ns | 0.1932 ns | 0.1713 ns | 0.20 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 495.717 ns | 1.9850 ns | 1.8567 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 1024 | 53.757 ns | 0.3212 ns | 0.2847 ns | 0.11 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 107.815 ns | 1.6201 ns | 1.3528 ns | 0.22 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 980.134 ns | 3.7407 ns | 3.1237 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 2048 | 105.120 ns | 0.6140 ns | 0.5443 ns | 0.11 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 216.473 ns | 2.3268 ns | 2.0627 ns | 0.22 | - | - | - | - |
}

65
tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs

@ -0,0 +1,65 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.ShortClr))]
public class ToVector4_Rgb24 : ToVector4<Rgb24>
{
[Benchmark(Baseline = true)]
public void PixelOperations_Base()
{
new PixelOperations<Rgb24>().ToVector4(
this.Configuration,
this.source.GetSpan(),
this.destination.GetSpan());
}
}
}
// 2020-11-02
// ##########
//
// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1)
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT
// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT
// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// IterationCount=3 LaunchCount=1 WarmupCount=3
//
// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:|
// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 64 | 298.4 ns | 33.63 ns | 1.84 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 64 | 355.5 ns | 908.51 ns | 49.80 ns | 1.19 | 0.17 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 64 | 220.1 ns | 13.77 ns | 0.75 ns | 1.00 | 0.00 | 0.0055 | - | - | 24 B |
// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 64 | 228.5 ns | 41.41 ns | 2.27 ns | 1.04 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 64 | 213.6 ns | 12.47 ns | 0.68 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 64 | 217.0 ns | 9.95 ns | 0.55 ns | 1.02 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 256 | 829.0 ns | 242.93 ns | 13.32 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 256 | 448.9 ns | 4.04 ns | 0.22 ns | 0.54 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 256 | 863.0 ns | 1,253.26 ns | 68.70 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B |
// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 256 | 309.2 ns | 66.16 ns | 3.63 ns | 0.36 | 0.03 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 256 | 737.0 ns | 253.90 ns | 13.92 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 256 | 212.3 ns | 1.07 ns | 0.06 ns | 0.29 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 2048 | 5,625.6 ns | 404.35 ns | 22.16 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 2048 | 1,974.1 ns | 229.84 ns | 12.60 ns | 0.35 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 2048 | 5,467.2 ns | 537.29 ns | 29.45 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 2048 | 1,985.5 ns | 4,714.23 ns | 258.40 ns | 0.36 | 0.05 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 2048 | 5,888.2 ns | 1,622.23 ns | 88.92 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 2048 | 1,165.0 ns | 191.71 ns | 10.51 ns | 0.20 | 0.00 | - | - | - | - |

13
tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs

@ -13,7 +13,7 @@ using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.ShortClr))]
[Config(typeof(Config.ShortCore31))]
public class ToVector4_Rgba32 : ToVector4<Rgba32>
{
[Benchmark]
@ -52,6 +52,17 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(sBytes, dFloats);
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Benchmark]
public void HwIntrinsics()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
SimdUtils.HwIntrinsics.ByteToNormalizedFloat(sBytes, dFloats);
}
#endif
// [Benchmark]
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_2Loops()
{

68
tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs

@ -0,0 +1,68 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.ShortCore31))]
public class UnPremultiplyVector4
{
private static readonly Vector4[] Vectors = CreateVectors();
[Benchmark(Baseline = true)]
public void UnPremultiplyBaseline()
{
ref Vector4 baseRef = ref MemoryMarshal.GetReference<Vector4>(Vectors);
for (int i = 0; i < Vectors.Length; i++)
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
UnPremultiply(ref v);
}
}
[Benchmark]
public void UnPremultiply()
{
Vector4Utilities.UnPremultiply(Vectors);
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void UnPremultiply(ref Vector4 source)
{
float w = source.W;
source /= w;
source.W = w;
}
private static Vector4[] CreateVectors()
{
var rnd = new Random(42);
return GenerateRandomVectorArray(rnd, 2048, 0, 1);
}
private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal)
{
var values = new Vector4[length];
for (int i = 0; i < length; i++)
{
ref Vector4 v = ref values[i];
v.X = GetRandomFloat(rnd, minVal, maxVal);
v.Y = GetRandomFloat(rnd, minVal, maxVal);
v.Z = GetRandomFloat(rnd, minVal, maxVal);
v.W = GetRandomFloat(rnd, minVal, maxVal);
}
return values;
}
private static float GetRandomFloat(Random rnd, float minVal, float maxVal)
=> ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal;
}
}

84
tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs

@ -0,0 +1,84 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics.X86;
#endif
using BenchmarkDotNet.Environments;
using BenchmarkDotNet.Jobs;
namespace SixLabors.ImageSharp.Benchmarks
{
public partial class Config
{
private const string On = "1";
private const string Off = "0";
// See https://github.com/SixLabors/ImageSharp/pull/1229#discussion_r440477861
// * EnableHWIntrinsic
// * EnableSSE
// * EnableSSE2
// * EnableAES
// * EnablePCLMULQDQ
// * EnableSSE3
// * EnableSSSE3
// * EnableSSE41
// * EnableSSE42
// * EnablePOPCNT
// * EnableAVX
// * EnableFMA
// * EnableAVX2
// * EnableBMI1
// * EnableBMI2
// * EnableLZCNT
//
// `FeatureSIMD` ends up impacting all SIMD support(including `System.Numerics`) but not things
// like `LZCNT`, `BMI1`, or `BMI2`
// `EnableSSE3_4` is a legacy switch that exists for compat and is basically the same as `EnableSSE3`
private const string EnableAES = "COMPlus_EnableAES";
private const string EnableAVX = "COMPlus_EnableAVX";
private const string EnableAVX2 = "COMPlus_EnableAVX2";
private const string EnableBMI1 = "COMPlus_EnableBMI1";
private const string EnableBMI2 = "COMPlus_EnableBMI2";
private const string EnableFMA = "COMPlus_EnableFMA";
private const string EnableHWIntrinsic = "COMPlus_EnableHWIntrinsic";
private const string EnableLZCNT = "COMPlus_EnableLZCNT";
private const string EnablePCLMULQDQ = "COMPlus_EnablePCLMULQDQ";
private const string EnablePOPCNT = "COMPlus_EnablePOPCNT";
private const string EnableSSE = "COMPlus_EnableSSE";
private const string EnableSSE2 = "COMPlus_EnableSSE2";
private const string EnableSSE3 = "COMPlus_EnableSSE3";
private const string EnableSSE3_4 = "COMPlus_EnableSSE3_4";
private const string EnableSSE41 = "COMPlus_EnableSSE41";
private const string EnableSSE42 = "COMPlus_EnableSSE42";
private const string EnableSSSE3 = "COMPlus_EnableSSSE3";
private const string FeatureSIMD = "COMPlus_FeatureSIMD";
public class HwIntrinsics_SSE_AVX : Config
{
public HwIntrinsics_SSE_AVX()
{
this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31)
.WithEnvironmentVariables(
new EnvironmentVariable(EnableHWIntrinsic, Off),
new EnvironmentVariable(FeatureSIMD, Off))
.WithId("1. No HwIntrinsics").AsBaseline());
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31)
.WithId("2. AVX"));
}
if (Sse.IsSupported)
{
this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31)
.WithEnvironmentVariables(new EnvironmentVariable(EnableAVX, Off))
.WithId("3. SSE"));
}
#endif
}
}
}
}

2
tests/ImageSharp.Benchmarks/Config.cs

@ -12,7 +12,7 @@ using BenchmarkDotNet.Jobs;
namespace SixLabors.ImageSharp.Benchmarks
{
public class Config : ManualConfig
public partial class Config : ManualConfig
{
public Config()
{

56
tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs

@ -168,49 +168,27 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion
[Benchmark]
public void PixelConverter_Rgba32_ToArgb32()
{
ref uint sBase = ref Unsafe.As<Rgba32, uint>(ref this.PermutedRunnerRgbaToArgb.Source[0]);
ref uint dBase = ref Unsafe.As<TestArgb, uint>(ref this.PermutedRunnerRgbaToArgb.Dest[0]);
Span<byte> source = MemoryMarshal.Cast<Rgba32, byte>(this.PermutedRunnerRgbaToArgb.Source);
Span<byte> dest = MemoryMarshal.Cast<TestArgb, byte>(this.PermutedRunnerRgbaToArgb.Dest);
for (int i = 0; i < this.Count; i++)
{
uint s = Unsafe.Add(ref sBase, i);
Unsafe.Add(ref dBase, i) = PixelConverter.FromRgba32.ToArgb32(s);
}
}
[Benchmark]
public void PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer()
{
Span<uint> source = MemoryMarshal.Cast<Rgba32, uint>(this.PermutedRunnerRgbaToArgb.Source);
Span<uint> dest = MemoryMarshal.Cast<TestArgb, uint>(this.PermutedRunnerRgbaToArgb.Dest);
source.CopyTo(dest);
ref uint dBase = ref MemoryMarshal.GetReference(dest);
for (int i = 0; i < this.Count; i++)
{
uint s = Unsafe.Add(ref dBase, i);
Unsafe.Add(ref dBase, i) = PixelConverter.FromRgba32.ToArgb32(s);
}
PixelConverter.FromRgba32.ToArgb32(source, dest);
}
/*
RESULTS:
Method | Count | Mean | Error | StdDev | Scaled | ScaledSD |
---------------------------------------------------------- |------ |-----------:|-----------:|-----------:|-------:|---------:|
ByRef | 256 | 328.7 ns | 6.6141 ns | 6.1868 ns | 1.00 | 0.00 |
ByVal | 256 | 322.0 ns | 4.3541 ns | 4.0728 ns | 0.98 | 0.02 |
FromBytes | 256 | 321.5 ns | 3.3499 ns | 3.1335 ns | 0.98 | 0.02 |
InlineShuffle | 256 | 330.7 ns | 4.2525 ns | 3.9778 ns | 1.01 | 0.02 |
PixelConverter_Rgba32_ToArgb32 | 256 | 167.4 ns | 0.6357 ns | 0.5309 ns | 0.51 | 0.01 |
PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer | 256 | 196.6 ns | 0.8929 ns | 0.7915 ns | 0.60 | 0.01 |
| | | | | | |
ByRef | 2048 | 2,534.4 ns | 8.2947 ns | 6.9265 ns | 1.00 | 0.00 |
ByVal | 2048 | 2,638.5 ns | 52.6843 ns | 70.3320 ns | 1.04 | 0.03 |
FromBytes | 2048 | 2,517.2 ns | 40.8055 ns | 38.1695 ns | 0.99 | 0.01 |
InlineShuffle | 2048 | 2,546.5 ns | 21.2506 ns | 19.8778 ns | 1.00 | 0.01 |
PixelConverter_Rgba32_ToArgb32 | 2048 | 1,265.7 ns | 5.1397 ns | 4.5562 ns | 0.50 | 0.00 |
PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer | 2048 | 1,410.3 ns | 11.1939 ns | 9.9231 ns | 0.56 | 0.00 |
*/
| Method | Count | Mean | Error | StdDev | Median | Ratio | RatioSD |
|------------------------------- |------ |------------:|----------:|----------:|------------:|------:|--------:|
| ByRef | 256 | 288.84 ns | 19.601 ns | 52.319 ns | 268.10 ns | 1.00 | 0.00 |
| ByVal | 256 | 267.97 ns | 1.831 ns | 1.713 ns | 267.85 ns | 0.77 | 0.18 |
| FromBytes | 256 | 266.81 ns | 2.427 ns | 2.270 ns | 266.47 ns | 0.76 | 0.18 |
| InlineShuffle | 256 | 291.41 ns | 5.820 ns | 5.444 ns | 290.17 ns | 0.83 | 0.19 |
| PixelConverter_Rgba32_ToArgb32 | 256 | 38.62 ns | 0.431 ns | 0.403 ns | 38.68 ns | 0.11 | 0.03 |
| | | | | | | | |
| ByRef | 2048 | 2,197.69 ns | 15.826 ns | 14.804 ns | 2,197.25 ns | 1.00 | 0.00 |
| ByVal | 2048 | 2,226.81 ns | 44.266 ns | 62.054 ns | 2,197.17 ns | 1.03 | 0.04 |
| FromBytes | 2048 | 2,181.35 ns | 18.033 ns | 16.868 ns | 2,185.97 ns | 0.99 | 0.01 |
| InlineShuffle | 2048 | 2,233.10 ns | 27.673 ns | 24.531 ns | 2,229.78 ns | 1.02 | 0.01 |
| PixelConverter_Rgba32_ToArgb32 | 2048 | 139.90 ns | 2.152 ns | 3.825 ns | 138.70 ns | 0.06 | 0.00 |
*/
}
}

3
tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj

@ -17,6 +17,7 @@
<ItemGroup>
<Compile Include="..\ImageSharp.Tests\TestImages.cs" Link="Tests\TestImages.cs" />
<Compile Include="..\ImageSharp.Tests\TestUtilities\TestEnvironment.cs" Link="Tests\TestEnvironment.cs" />
<Compile Include="..\ImageSharp.Tests\TestUtilities\TestDataGenerator.cs" Link="Tests\TestDataGenerator.cs" />
</ItemGroup>
<ItemGroup>
@ -25,7 +26,7 @@
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Condition="'$(OS)' == 'Windows_NT'" />
<PackageReference Include="Colourful" />
<PackageReference Include="Pfim" />
<PackageReference Include="SharpZipLib" />
<PackageReference Include="SharpZipLib" />
<PackageReference Include="System.Drawing.Common" />
</ItemGroup>

53
tests/ImageSharp.Benchmarks/Processing/HistogramEqualization.cs

@ -0,0 +1,53 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.IO;
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing;
using SixLabors.ImageSharp.Processing.Processors.Normalization;
using SixLabors.ImageSharp.Tests;
namespace SixLabors.ImageSharp.Benchmarks.Processing
{
[Config(typeof(Config.ShortClr))]
public class HistogramEqualization : BenchmarkBase
{
private Image<Rgba32> image;
[GlobalSetup]
public void ReadImages()
{
if (this.image == null)
{
this.image = Image.Load<Rgba32>(File.OpenRead(Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, TestImages.Jpeg.Baseline.HistogramEqImage)));
}
}
[GlobalCleanup]
public void Cleanup()
{
this.image.Dispose();
}
[Benchmark(Description = "Global Histogram Equalization")]
public void GlobalHistogramEqualization()
{
this.image.Mutate(img => img.HistogramEqualization(new HistogramEqualizationOptions()
{
LuminanceLevels = 256,
Method = HistogramEqualizationMethod.Global
}));
}
[Benchmark(Description = "AdaptiveHistogramEqualization (Tile interpolation)")]
public void AdaptiveHistogramEqualization()
{
this.image.Mutate(img => img.HistogramEqualization(new HistogramEqualizationOptions()
{
LuminanceLevels = 256,
Method = HistogramEqualizationMethod.AdaptiveTileInterpolation
}));
}
}
}

399
tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs

@ -0,0 +1,399 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
namespace SixLabors.ImageSharp.Tests.Common
{
public partial class SimdUtilsTests
{
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy4))]
public void BulkShuffleFloat4Channel(int count)
{
static void RunTest(string serialized)
{
// No need to test multiple shuffle controls as the
// pipeline is always the same.
int size = FeatureTestRunner.Deserialize<int>(serialized);
byte control = default(WZYXShuffle4).Control;
TestShuffleFloat4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, control),
control);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE);
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy4))]
public void BulkShuffleByte4Channel(int count)
{
static void RunTest(string serialized)
{
int size = FeatureTestRunner.Deserialize<int>(serialized);
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IShuffle4 to the generic utils method.
WXYZShuffle4 wxyz = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz),
wxyz.Control);
WZYXShuffle4 wzyx = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx),
wzyx.Control);
YZWXShuffle4 yzwx = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx),
yzwx.Control);
ZYXWShuffle4 zyxw = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw),
zyxw.Control);
var xwyz = new DefaultShuffle4(2, 1, 3, 0);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz),
xwyz.Control);
var yyyy = new DefaultShuffle4(1, 1, 1, 1);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy),
yyyy.Control);
var wwww = new DefaultShuffle4(3, 3, 3, 3);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww),
wwww.Control);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE);
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy3))]
public void BulkShuffleByte3Channel(int count)
{
static void RunTest(string serialized)
{
int size = FeatureTestRunner.Deserialize<int>(serialized);
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IShuffle3 to the generic utils method.
var zyx = new DefaultShuffle3(0, 1, 2);
TestShuffleByte3Channel(
size,
(s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx),
zyx.Control);
var xyz = new DefaultShuffle3(2, 1, 0);
TestShuffleByte3Channel(
size,
(s, d) => SimdUtils.Shuffle3(s.Span, d.Span, xyz),
xyz.Control);
var yyy = new DefaultShuffle3(1, 1, 1);
TestShuffleByte3Channel(
size,
(s, d) => SimdUtils.Shuffle3(s.Span, d.Span, yyy),
yyy.Control);
var zzz = new DefaultShuffle3(2, 2, 2);
TestShuffleByte3Channel(
size,
(s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zzz),
zzz.Control);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE);
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy3))]
public void BulkPad3Shuffle4Channel(int count)
{
static void RunTest(string serialized)
{
int size = FeatureTestRunner.Deserialize<int>(serialized);
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IPad3Shuffle4 to the generic utils method.
XYZWPad3Shuffle4 xyzw = default;
TestPad3Shuffle4Channel(
size,
(s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xyzw),
xyzw.Control);
var xwyz = new DefaultPad3Shuffle4(2, 1, 3, 0);
TestPad3Shuffle4Channel(
size,
(s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xwyz),
xwyz.Control);
var yyyy = new DefaultPad3Shuffle4(1, 1, 1, 1);
TestPad3Shuffle4Channel(
size,
(s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, yyyy),
yyyy.Control);
var wwww = new DefaultPad3Shuffle4(3, 3, 3, 3);
TestPad3Shuffle4Channel(
size,
(s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, wwww),
wwww.Control);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE);
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy4))]
public void BulkShuffle4Slice3Channel(int count)
{
static void RunTest(string serialized)
{
int size = FeatureTestRunner.Deserialize<int>(serialized);
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IShuffle4Slice3 to the generic utils method.
XYZWShuffle4Slice3 xyzw = default;
TestShuffle4Slice3Channel(
size,
(s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xyzw),
xyzw.Control);
var xwyz = new DefaultShuffle4Slice3(2, 1, 3, 0);
TestShuffle4Slice3Channel(
size,
(s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xwyz),
xwyz.Control);
var yyyy = new DefaultShuffle4Slice3(1, 1, 1, 1);
TestShuffle4Slice3Channel(
size,
(s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, yyyy),
yyyy.Control);
var wwww = new DefaultShuffle4Slice3(3, 3, 3, 3);
TestShuffle4Slice3Channel(
size,
(s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, wwww),
wwww.Control);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE);
}
private static void TestShuffleFloat4Channel(
int count,
Action<Memory<float>, Memory<float>> convert,
byte control)
{
float[] source = new Random(count).GenerateRandomFloatArray(count, 0, 256);
var result = new float[count];
float[] expected = new float[count];
SimdUtils.Shuffle.InverseMmShuffle(
control,
out int p3,
out int p2,
out int p1,
out int p0);
for (int i = 0; i < expected.Length; i += 4)
{
expected[i] = source[p0 + i];
expected[i + 1] = source[p1 + i];
expected[i + 2] = source[p2 + i];
expected[i + 3] = source[p3 + i];
}
convert(source, result);
Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5F));
}
private static void TestShuffleByte4Channel(
int count,
Action<Memory<byte>, Memory<byte>> convert,
byte control)
{
byte[] source = new byte[count];
new Random(count).NextBytes(source);
var result = new byte[count];
byte[] expected = new byte[count];
SimdUtils.Shuffle.InverseMmShuffle(
control,
out int p3,
out int p2,
out int p1,
out int p0);
for (int i = 0; i < expected.Length; i += 4)
{
expected[i] = source[p0 + i];
expected[i + 1] = source[p1 + i];
expected[i + 2] = source[p2 + i];
expected[i + 3] = source[p3 + i];
}
convert(source, result);
Assert.Equal(expected, result);
}
private static void TestShuffleByte3Channel(
int count,
Action<Memory<byte>, Memory<byte>> convert,
byte control)
{
byte[] source = new byte[count];
new Random(count).NextBytes(source);
var result = new byte[count];
byte[] expected = new byte[count];
SimdUtils.Shuffle.InverseMmShuffle(
control,
out int _,
out int p2,
out int p1,
out int p0);
for (int i = 0; i < expected.Length; i += 3)
{
expected[i] = source[p0 + i];
expected[i + 1] = source[p1 + i];
expected[i + 2] = source[p2 + i];
}
convert(source, result);
Assert.Equal(expected, result);
}
private static void TestPad3Shuffle4Channel(
int count,
Action<Memory<byte>, Memory<byte>> convert,
byte control)
{
byte[] source = new byte[count];
new Random(count).NextBytes(source);
var result = new byte[count * 4 / 3];
byte[] expected = new byte[result.Length];
SimdUtils.Shuffle.InverseMmShuffle(
control,
out int p3,
out int p2,
out int p1,
out int p0);
for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3)
{
expected[p0 + i] = source[j];
expected[p1 + i] = source[j + 1];
expected[p2 + i] = source[j + 2];
expected[p3 + i] = byte.MaxValue;
}
Span<byte> temp = stackalloc byte[4];
for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3)
{
temp[0] = source[j];
temp[1] = source[j + 1];
temp[2] = source[j + 2];
temp[3] = byte.MaxValue;
expected[i] = temp[p0];
expected[i + 1] = temp[p1];
expected[i + 2] = temp[p2];
expected[i + 3] = temp[p3];
}
convert(source, result);
for (int i = 0; i < expected.Length; i++)
{
Assert.Equal(expected[i], result[i]);
}
Assert.Equal(expected, result);
}
private static void TestShuffle4Slice3Channel(
int count,
Action<Memory<byte>, Memory<byte>> convert,
byte control)
{
byte[] source = new byte[count];
new Random(count).NextBytes(source);
var result = new byte[count * 3 / 4];
byte[] expected = new byte[result.Length];
SimdUtils.Shuffle.InverseMmShuffle(
control,
out int _,
out int p2,
out int p1,
out int p0);
for (int i = 0, j = 0; i < expected.Length; i += 3, j += 4)
{
expected[i] = source[p0 + j];
expected[i + 1] = source[p1 + j];
expected[i + 2] = source[p2 + j];
}
convert(source, result);
for (int i = 0; i < expected.Length; i++)
{
Assert.Equal(expected[i], result[i]);
}
Assert.Equal(expected, result);
}
}
}

38
tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

@ -7,13 +7,13 @@ using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Common.Tuples;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
using Xunit.Abstractions;
namespace SixLabors.ImageSharp.Tests.Common
{
public class SimdUtilsTests
public partial class SimdUtilsTests
{
private ITestOutputHelper Output { get; }
@ -163,7 +163,7 @@ namespace SixLabors.ImageSharp.Tests.Common
public static readonly TheoryData<int> ArraySizesDivisibleBy8 = new TheoryData<int> { 0, 8, 16, 1024 };
public static readonly TheoryData<int> ArraySizesDivisibleBy4 = new TheoryData<int> { 0, 4, 8, 28, 1020 };
public static readonly TheoryData<int> ArraySizesDivisibleBy3 = new TheoryData<int> { 0, 3, 9, 36, 957 };
public static readonly TheoryData<int> ArraySizesDivisibleBy32 = new TheoryData<int> { 0, 32, 512 };
public static readonly TheoryData<int> ArbitraryArraySizes =
@ -204,6 +204,25 @@ namespace SixLabors.ImageSharp.Tests.Common
(s, d) => SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(s.Span, d.Span));
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy32))]
public void HwIntrinsics_BulkConvertByteToNormalizedFloat(int count)
{
static void RunTest(string serialized)
{
TestImpl_BulkConvertByteToNormalizedFloat(
FeatureTestRunner.Deserialize<int>(serialized),
(s, d) => SimdUtils.HwIntrinsics.ByteToNormalizedFloat(s.Span, d.Span));
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE41);
}
#endif
[Theory]
[MemberData(nameof(ArbitraryArraySizes))]
public void BulkConvertByteToNormalizedFloat(int count)
@ -281,16 +300,19 @@ namespace SixLabors.ImageSharp.Tests.Common
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy32))]
public void Avx2_BulkConvertNormalizedFloatToByteClampOverflows(int count)
public void HwIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count)
{
if (!System.Runtime.Intrinsics.X86.Avx2.IsSupported)
static void RunTest(string serialized)
{
return;
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
FeatureTestRunner.Deserialize<int>(serialized),
(s, d) => SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span));
}
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
(s, d) => SimdUtils.Avx2Intrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span));
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
#endif

36
tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs

@ -39,22 +39,32 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp
};
[Theory]
[WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32, false)]
[WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32, true)]
public void BmpDecoder_CanDecode_MiscellaneousBitmaps<TPixel>(TestImageProvider<TPixel> provider, bool enforceDiscontiguousBuffers)
[WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32)]
public void BmpDecoder_CanDecode_MiscellaneousBitmaps<TPixel>(TestImageProvider<TPixel> provider)
where TPixel : unmanaged, IPixel<TPixel>
{
using Image<TPixel> image = provider.GetImage(BmpDecoder);
image.DebugSave(provider);
if (TestEnvironment.IsWindows)
{
image.CompareToOriginal(provider);
}
}
[Theory]
[WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32)]
public void BmpDecoder_CanDecode_MiscellaneousBitmaps_WithLimitedAllocatorBufferCapacity(
TestImageProvider<Rgba32> provider)
{
static void RunTest(string providerDump, string nonContiguousBuffersStr)
{
TestImageProvider<TPixel> provider = BasicSerializer.Deserialize<TestImageProvider<TPixel>>(providerDump);
TestImageProvider<Rgba32> provider = BasicSerializer.Deserialize<TestImageProvider<Rgba32>>(providerDump);
if (!string.IsNullOrEmpty(nonContiguousBuffersStr))
{
provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100);
}
provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100);
using Image<TPixel> image = provider.GetImage(BmpDecoder);
image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr);
using Image<Rgba32> image = provider.GetImage(BmpDecoder);
image.DebugSave(provider, nonContiguousBuffersStr);
if (TestEnvironment.IsWindows)
{
@ -66,7 +76,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp
RemoteExecutor.Invoke(
RunTest,
providerDump,
enforceDiscontiguousBuffers ? "Disco" : string.Empty)
"Disco")
.Dispose();
}
@ -348,7 +358,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp
using (Image<TPixel> image = provider.GetImage(BmpDecoder))
{
image.DebugSave(provider);
image.CompareToOriginal(provider);
// Do not validate. Reference files will fail validation.
image.CompareToOriginal(provider, new MagickReferenceDecoder(false));
}
}

22
tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs

@ -10,7 +10,7 @@ using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing;
using SixLabors.ImageSharp.Processing.Processors.Quantization;
using SixLabors.ImageSharp.Tests.TestUtilities.ImageComparison;
using SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs;
using Xunit;
using Xunit.Abstractions;
@ -200,10 +200,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp
Quantizer = new WuQuantizer()
};
string actualOutputFile = provider.Utility.SaveTestOutputFile(image, "bmp", encoder, appendPixelTypeToFileName: false);
// Use the default decoder to test our encoded image. This verifies the content.
// We do not verify the reference image though as some are invalid.
IImageDecoder referenceDecoder = TestEnvironment.GetReferenceDecoder(actualOutputFile);
using (var referenceImage = Image.Load<TPixel>(actualOutputFile, referenceDecoder))
{
referenceImage.CompareToReferenceOutput(ImageComparer.TolerantPercentage(0.01f), provider, extension: "bmp", appendPixelTypeToFileName: false);
referenceImage.CompareToReferenceOutput(
ImageComparer.TolerantPercentage(0.01f),
provider,
extension: "bmp",
appendPixelTypeToFileName: false,
decoder: new MagickReferenceDecoder(false));
}
}
}
@ -226,10 +234,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp
Quantizer = new OctreeQuantizer()
};
string actualOutputFile = provider.Utility.SaveTestOutputFile(image, "bmp", encoder, appendPixelTypeToFileName: false);
// Use the default decoder to test our encoded image. This verifies the content.
// We do not verify the reference image though as some are invalid.
IImageDecoder referenceDecoder = TestEnvironment.GetReferenceDecoder(actualOutputFile);
using (var referenceImage = Image.Load<TPixel>(actualOutputFile, referenceDecoder))
{
referenceImage.CompareToReferenceOutput(ImageComparer.TolerantPercentage(0.01f), provider, extension: "bmp", appendPixelTypeToFileName: false);
referenceImage.CompareToReferenceOutput(
ImageComparer.TolerantPercentage(0.01f),
provider,
extension: "bmp",
appendPixelTypeToFileName: false,
decoder: new MagickReferenceDecoder(false));
}
}
}

11
tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs

@ -198,17 +198,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Gif
[Theory]
[WithFile(TestImages.Gif.Giphy, PixelTypes.Rgba32)]
[WithFile(TestImages.Gif.Kumin, PixelTypes.Rgba32)]
public void GifDecoder_CanDecode_WithLimitedAllocatorBufferCapacity<TPixel>(TestImageProvider<TPixel> provider)
where TPixel : unmanaged, IPixel<TPixel>
public void GifDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(
TestImageProvider<Rgba32> provider)
{
static void RunTest(string providerDump, string nonContiguousBuffersStr)
{
TestImageProvider<TPixel> provider = BasicSerializer.Deserialize<TestImageProvider<TPixel>>(providerDump);
TestImageProvider<Rgba32> provider
= BasicSerializer.Deserialize<TestImageProvider<Rgba32>>(providerDump);
provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100);
using Image<TPixel> image = provider.GetImage(GifDecoder);
image.DebugSave(provider);
using Image<Rgba32> image = provider.GetImage(GifDecoder);
image.DebugSave(provider, nonContiguousBuffersStr);
image.CompareToOriginal(provider);
}

186
tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

@ -8,7 +8,7 @@ using System.Diagnostics;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
using Xunit.Abstractions;
@ -45,20 +45,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
this.Measure(
Times,
() =>
{
var block = default(Block8x8F);
for (int i = 0; i < Block8x8F.Size; i++)
{
var block = default(Block8x8F);
for (int i = 0; i < Block8x8F.Size; i++)
{
block[i] = i;
}
sum = 0;
for (int i = 0; i < Block8x8F.Size; i++)
{
sum += block[i];
}
});
block[i] = i;
}
sum = 0;
for (int i = 0; i < Block8x8F.Size; i++)
{
sum += block[i];
}
});
Assert.Equal(sum, 64f * 63f * 0.5f);
}
@ -70,20 +70,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
this.Measure(
Times,
() =>
{
// Block8x8F block = new Block8x8F();
float[] block = new float[64];
for (int i = 0; i < Block8x8F.Size; i++)
{
// Block8x8F block = new Block8x8F();
float[] block = new float[64];
for (int i = 0; i < Block8x8F.Size; i++)
{
block[i] = i;
}
sum = 0;
for (int i = 0; i < Block8x8F.Size; i++)
{
sum += block[i];
}
});
block[i] = i;
}
sum = 0;
for (int i = 0; i < Block8x8F.Size; i++)
{
sum += block[i];
}
});
Assert.Equal(sum, 64f * 63f * 0.5f);
}
@ -101,11 +101,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
this.Measure(
Times,
() =>
{
var b = default(Block8x8F);
b.LoadFrom(data);
b.ScaledCopyTo(mirror);
});
{
var b = default(Block8x8F);
b.LoadFrom(data);
b.ScaledCopyTo(mirror);
});
Assert.Equal(data, mirror);
@ -126,11 +126,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
this.Measure(
Times,
() =>
{
var b = default(Block8x8F);
Block8x8F.LoadFrom(&b, data);
Block8x8F.ScaledCopyTo(&b, mirror);
});
{
var b = default(Block8x8F);
Block8x8F.LoadFrom(&b, data);
Block8x8F.ScaledCopyTo(&b, mirror);
});
Assert.Equal(data, mirror);
@ -151,11 +151,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
this.Measure(
Times,
() =>
{
var v = default(Block8x8F);
v.LoadFrom(data);
v.ScaledCopyTo(mirror);
});
{
var v = default(Block8x8F);
v.LoadFrom(data);
v.ScaledCopyTo(mirror);
});
Assert.Equal(data, mirror);
@ -165,19 +165,26 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[Fact]
public void TransposeInto()
{
float[] expected = Create8x8FloatData();
ReferenceImplementations.Transpose8x8(expected);
static void RunTest()
{
float[] expected = Create8x8FloatData();
ReferenceImplementations.Transpose8x8(expected);
var source = default(Block8x8F);
source.LoadFrom(Create8x8FloatData());
var source = default(Block8x8F);
source.LoadFrom(Create8x8FloatData());
var dest = default(Block8x8F);
source.TransposeInto(ref dest);
var dest = default(Block8x8F);
source.TransposeInto(ref dest);
float[] actual = new float[64];
dest.ScaledCopyTo(actual);
float[] actual = new float[64];
dest.ScaledCopyTo(actual);
Assert.Equal(expected, actual);
Assert.Equal(expected, actual);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX);
}
private class BufferHolder
@ -228,7 +235,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
this.PrintLinearData(input);
Block8x8F dest = block;
dest.NormalizeColorsInplace(255);
dest.NormalizeColorsInPlace(255);
float[] array = new float[64];
dest.ScaledCopyTo(array);
@ -253,11 +260,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
Block8x8F source = CreateRandomFloatBlock(-200, 200, seed);
Block8x8F expected = source;
expected.NormalizeColorsInplace(255);
expected.RoundInplace();
expected.NormalizeColorsInPlace(255);
expected.RoundInPlace();
Block8x8F actual = source;
actual.NormalizeColorsAndRoundInplaceVector8(255);
actual.NormalizeColorsAndRoundInPlaceVector8(255);
this.Output.WriteLine(expected.ToString());
this.Output.WriteLine(actual.ToString());
@ -318,12 +325,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public void RoundInplaceSlow(int seed)
public void RoundInPlaceSlow(int seed)
{
Block8x8F s = CreateRandomFloatBlock(-500, 500, seed);
Block8x8F d = s;
d.RoundInplace();
d.RoundInPlace();
this.Output.WriteLine(s.ToString());
this.Output.WriteLine(d.ToString());
@ -338,19 +345,26 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
}
[Fact]
public void MultiplyInplace_ByOtherBlock()
public void MultiplyInPlace_ByOtherBlock()
{
Block8x8F original = CreateRandomFloatBlock(-500, 500, 42);
Block8x8F m = CreateRandomFloatBlock(-500, 500, 42);
static void RunTest()
{
Block8x8F original = CreateRandomFloatBlock(-500, 500, 42);
Block8x8F m = CreateRandomFloatBlock(-500, 500, 42);
Block8x8F actual = original;
Block8x8F actual = original;
actual.MultiplyInplace(ref m);
actual.MultiplyInPlace(ref m);
for (int i = 0; i < Block8x8F.Size; i++)
{
Assert.Equal(original[i] * m[i], actual[i]);
for (int i = 0; i < Block8x8F.Size; i++)
{
Assert.Equal(original[i] * m[i], actual[i]);
}
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX);
}
[Theory]
@ -390,23 +404,51 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
ReferenceImplementations.DequantizeBlock(&expected, &qt, unzig.Data);
actual.MultiplyInplace(ref zigQt);
actual.MultiplyInPlace(ref zigQt);
this.CompareBlocks(expected, actual, 0);
}
[Fact]
public void MultiplyInplace_ByScalar()
public void AddToAllInPlace()
{
Block8x8F original = CreateRandomFloatBlock(-500, 500);
static void RunTest()
{
Block8x8F original = CreateRandomFloatBlock(-500, 500);
Block8x8F actual = original;
actual.MultiplyInplace(42f);
Block8x8F actual = original;
actual.AddInPlace(42f);
for (int i = 0; i < 64; i++)
for (int i = 0; i < 64; i++)
{
Assert.Equal(original[i] + 42f, actual[i]);
}
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX);
}
[Fact]
public void MultiplyInPlace_ByScalar()
{
static void RunTest()
{
Assert.Equal(original[i] * 42f, actual[i]);
Block8x8F original = CreateRandomFloatBlock(-500, 500);
Block8x8F actual = original;
actual.MultiplyInPlace(42f);
for (int i = 0; i < 64; i++)
{
Assert.Equal(original[i] * 42f, actual[i]);
}
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX);
}
[Fact]

490
tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs

@ -22,6 +22,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
private static readonly ApproximateColorSpaceComparer ColorSpaceComparer = new ApproximateColorSpaceComparer(Precision);
// int inputBufferLength, int resultBufferLength, int seed
public static readonly TheoryData<int, int, int> CommonConversionData =
new TheoryData<int, int, int>
{
@ -41,9 +42,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[Theory]
[MemberData(nameof(CommonConversionData))]
public void ConvertFromYCbCrBasic(int inputBufferLength, int resultBufferLength, int seed)
public void FromYCbCrBasic(int inputBufferLength, int resultBufferLength, int seed)
{
ValidateRgbToYCbCrConversion(
ValidateConversion(
new JpegColorConverter.FromYCbCrBasic(8),
3,
inputBufferLength,
@ -51,44 +52,36 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
seed);
}
private static void ValidateYCbCr(in JpegColorConverter.ComponentValues values, Vector4[] result, int i)
{
float y = values.Component0[i];
float cb = values.Component1[i];
float cr = values.Component2[i];
var ycbcr = new YCbCr(y, cb, cr);
Vector4 rgba = result[i];
var actual = new Rgb(rgba.X, rgba.Y, rgba.Z);
var expected = ColorSpaceConverter.ToRgb(ycbcr);
Assert.Equal(expected, actual, ColorSpaceComparer);
Assert.Equal(1, rgba.W);
}
[Theory]
[InlineData(64, 1)]
[InlineData(16, 2)]
[InlineData(8, 3)]
public void FromYCbCrSimd_ConvertCore(int size, int seed)
[MemberData(nameof(CommonConversionData))]
public void FromYCbCrVector4(int inputBufferLength, int resultBufferLength, int seed)
{
JpegColorConverter.ComponentValues values = CreateRandomValues(3, size, seed);
var result = new Vector4[size];
JpegColorConverter.FromYCbCrSimd.ConvertCore(values, result, 255, 128);
for (int i = 0; i < size; i++)
if (!SimdUtils.HasVector4)
{
ValidateYCbCr(values, result, i);
this.Output.WriteLine("No SSE present, skipping test!");
return;
}
ValidateConversion(
new JpegColorConverter.FromYCbCrVector4(8),
3,
inputBufferLength,
resultBufferLength,
seed);
}
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromYCbCrSimd(int inputBufferLength, int resultBufferLength, int seed)
public void FromYCbCrVector8(int inputBufferLength, int resultBufferLength, int seed)
{
ValidateRgbToYCbCrConversion(
new JpegColorConverter.FromYCbCrSimd(8),
if (!SimdUtils.HasVector8)
{
this.Output.WriteLine("No AVX2 present, skipping test!");
return;
}
ValidateConversion(
new JpegColorConverter.FromYCbCrVector8(8),
3,
inputBufferLength,
resultBufferLength,
@ -97,17 +90,16 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromYCbCrSimdAvx2(int inputBufferLength, int resultBufferLength, int seed)
public void FromYCbCrAvx2(int inputBufferLength, int resultBufferLength, int seed)
{
if (!SimdUtils.HasVector8)
if (!SimdUtils.HasAvx2)
{
this.Output.WriteLine("No AVX2 present, skipping test!");
return;
}
// JpegColorConverter.FromYCbCrSimdAvx2.LogPlz = s => this.Output.WriteLine(s);
ValidateRgbToYCbCrConversion(
new JpegColorConverter.FromYCbCrSimdVector8(8),
ValidateConversion(
new JpegColorConverter.FromYCbCrAvx2(8),
3,
inputBufferLength,
resultBufferLength,
@ -116,7 +108,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[Theory]
[MemberData(nameof(CommonConversionData))]
public void ConvertFromYCbCr_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed)
public void FromYCbCr_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed)
{
ValidateConversion(
JpegColorSpace.YCbCr,
@ -126,149 +118,251 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
seed);
}
// Benchmark, for local execution only
// [Theory]
// [InlineData(false)]
// [InlineData(true)]
public void BenchmarkYCbCr(bool simd)
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromCmykBasic(int inputBufferLength, int resultBufferLength, int seed)
{
int count = 2053;
int times = 50000;
JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1);
var result = new Vector4[count];
ValidateConversion(
new JpegColorConverter.FromCmykBasic(8),
4,
inputBufferLength,
resultBufferLength,
seed);
}
JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrSimd(8) : new JpegColorConverter.FromYCbCrBasic(8);
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromCmykVector8(int inputBufferLength, int resultBufferLength, int seed)
{
if (!SimdUtils.HasVector8)
{
this.Output.WriteLine("No AVX2 present, skipping test!");
return;
}
// Warm up:
converter.ConvertToRgba(values, result);
ValidateConversion(
new JpegColorConverter.FromCmykVector8(8),
4,
inputBufferLength,
resultBufferLength,
seed);
}
using (new MeasureGuard(this.Output, $"{converter.GetType().Name} x {times}"))
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromCmykAvx2(int inputBufferLength, int resultBufferLength, int seed)
{
if (!SimdUtils.HasAvx2)
{
for (int i = 0; i < times; i++)
{
converter.ConvertToRgba(values, result);
}
this.Output.WriteLine("No AVX2 present, skipping test!");
return;
}
ValidateConversion(
new JpegColorConverter.FromCmykAvx2(8),
4,
inputBufferLength,
resultBufferLength,
seed);
}
[Theory]
[MemberData(nameof(CommonConversionData))]
public void ConvertFromCmyk(int inputBufferLength, int resultBufferLength, int seed)
public void FromCmyk_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed)
{
var v = new Vector4(0, 0, 0, 1F);
var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F);
var converter = JpegColorConverter.GetConverter(JpegColorSpace.Cmyk, 8);
JpegColorConverter.ComponentValues values = CreateRandomValues(4, inputBufferLength, seed);
var result = new Vector4[resultBufferLength];
ValidateConversion(
JpegColorSpace.Cmyk,
4,
inputBufferLength,
resultBufferLength,
seed);
}
converter.ConvertToRgba(values, result);
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromGrayscaleBasic(int inputBufferLength, int resultBufferLength, int seed)
{
ValidateConversion(
new JpegColorConverter.FromGrayscaleBasic(8),
1,
inputBufferLength,
resultBufferLength,
seed);
}
for (int i = 0; i < resultBufferLength; i++)
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromGrayscaleAvx2(int inputBufferLength, int resultBufferLength, int seed)
{
if (!SimdUtils.HasAvx2)
{
float c = values.Component0[i];
float m = values.Component1[i];
float y = values.Component2[i];
float k = values.Component3[i] / 255F;
this.Output.WriteLine("No AVX2 present, skipping test!");
return;
}
v.X = c * k;
v.Y = m * k;
v.Z = y * k;
v.W = 1F;
ValidateConversion(
new JpegColorConverter.FromGrayscaleAvx2(8),
1,
inputBufferLength,
resultBufferLength,
seed);
}
v *= scale;
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromGraysacle_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed)
{
ValidateConversion(
JpegColorSpace.Grayscale,
1,
inputBufferLength,
resultBufferLength,
seed);
}
Vector4 rgba = result[i];
var actual = new Rgb(rgba.X, rgba.Y, rgba.Z);
var expected = new Rgb(v.X, v.Y, v.Z);
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromRgbBasic(int inputBufferLength, int resultBufferLength, int seed)
{
ValidateConversion(
new JpegColorConverter.FromRgbBasic(8),
3,
inputBufferLength,
resultBufferLength,
seed);
}
Assert.Equal(expected, actual);
Assert.Equal(1, rgba.W);
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromRgbVector8(int inputBufferLength, int resultBufferLength, int seed)
{
if (!SimdUtils.HasVector8)
{
this.Output.WriteLine("No AVX2 present, skipping test!");
return;
}
ValidateConversion(
new JpegColorConverter.FromRgbVector8(8),
3,
inputBufferLength,
resultBufferLength,
seed);
}
[Theory]
[MemberData(nameof(CommonConversionData))]
public void ConvertFromGrayScale(int inputBufferLength, int resultBufferLength, int seed)
public void FromRgbAvx2(int inputBufferLength, int resultBufferLength, int seed)
{
var converter = JpegColorConverter.GetConverter(JpegColorSpace.Grayscale, 8);
JpegColorConverter.ComponentValues values = CreateRandomValues(1, inputBufferLength, seed);
var result = new Vector4[resultBufferLength];
if (!SimdUtils.HasAvx2)
{
this.Output.WriteLine("No AVX2 present, skipping test!");
return;
}
converter.ConvertToRgba(values, result);
ValidateConversion(
new JpegColorConverter.FromRgbAvx2(8),
3,
inputBufferLength,
resultBufferLength,
seed);
}
for (int i = 0; i < resultBufferLength; i++)
{
float y = values.Component0[i];
Vector4 rgba = result[i];
var actual = new Rgb(rgba.X, rgba.Y, rgba.Z);
var expected = new Rgb(y / 255F, y / 255F, y / 255F);
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromRgb_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed)
{
ValidateConversion(
JpegColorSpace.RGB,
3,
inputBufferLength,
resultBufferLength,
seed);
}
Assert.Equal(expected, actual, ColorSpaceComparer);
Assert.Equal(1, rgba.W);
}
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromYccKBasic(int inputBufferLength, int resultBufferLength, int seed)
{
ValidateConversion(
new JpegColorConverter.FromYccKBasic(8),
4,
inputBufferLength,
resultBufferLength,
seed);
}
[Theory]
[MemberData(nameof(CommonConversionData))]
public void ConvertFromRgb(int inputBufferLength, int resultBufferLength, int seed)
public void FromYccKVector8(int inputBufferLength, int resultBufferLength, int seed)
{
var converter = JpegColorConverter.GetConverter(JpegColorSpace.RGB, 8);
JpegColorConverter.ComponentValues values = CreateRandomValues(3, inputBufferLength, seed);
var result = new Vector4[resultBufferLength];
if (!SimdUtils.HasVector8)
{
this.Output.WriteLine("No AVX2 present, skipping test!");
return;
}
converter.ConvertToRgba(values, result);
ValidateConversion(
new JpegColorConverter.FromYccKVector8(8),
4,
inputBufferLength,
resultBufferLength,
seed);
}
for (int i = 0; i < resultBufferLength; i++)
[Theory]
[MemberData(nameof(CommonConversionData))]
public void FromYccKAvx2(int inputBufferLength, int resultBufferLength, int seed)
{
if (!SimdUtils.HasAvx2)
{
float r = values.Component0[i];
float g = values.Component1[i];
float b = values.Component2[i];
Vector4 rgba = result[i];
var actual = new Rgb(rgba.X, rgba.Y, rgba.Z);
var expected = new Rgb(r / 255F, g / 255F, b / 255F);
Assert.Equal(expected, actual, ColorSpaceComparer);
Assert.Equal(1, rgba.W);
this.Output.WriteLine("No AVX2 present, skipping test!");
return;
}
ValidateConversion(
new JpegColorConverter.FromYccKAvx2(8),
4,
inputBufferLength,
resultBufferLength,
seed);
}
[Theory]
[MemberData(nameof(CommonConversionData))]
public void ConvertFromYcck(int inputBufferLength, int resultBufferLength, int seed)
public void FromYcck_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed)
{
var v = new Vector4(0, 0, 0, 1F);
var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F);
ValidateConversion(
JpegColorSpace.Ycck,
4,
inputBufferLength,
resultBufferLength,
seed);
}
var converter = JpegColorConverter.GetConverter(JpegColorSpace.Ycck, 8);
JpegColorConverter.ComponentValues values = CreateRandomValues(4, inputBufferLength, seed);
var result = new Vector4[resultBufferLength];
// Benchmark, for local execution only
// [Theory]
// [InlineData(false)]
// [InlineData(true)]
public void BenchmarkYCbCr(bool simd)
{
int count = 2053;
int times = 50000;
JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1);
var result = new Vector4[count];
JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector4(8) : new JpegColorConverter.FromYCbCrBasic(8);
// Warm up:
converter.ConvertToRgba(values, result);
for (int i = 0; i < resultBufferLength; i++)
using (new MeasureGuard(this.Output, $"{converter.GetType().Name} x {times}"))
{
float y = values.Component0[i];
float cb = values.Component1[i] - 128F;
float cr = values.Component2[i] - 128F;
float k = values.Component3[i] / 255F;
v.X = (255F - (float)Math.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k;
v.Y = (255F - (float)Math.Round(
y - (0.344136F * cb) - (0.714136F * cr),
MidpointRounding.AwayFromZero)) * k;
v.Z = (255F - (float)Math.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k;
v.W = 1F;
v *= scale;
Vector4 rgba = result[i];
var actual = new Rgb(rgba.X, rgba.Y, rgba.Z);
var expected = new Rgb(v.X, v.Y, v.Z);
Assert.Equal(expected, actual, ColorSpaceComparer);
Assert.Equal(1, rgba.W);
for (int i = 0; i < times; i++)
{
converter.ConvertToRgba(values, result);
}
}
}
@ -283,7 +377,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
var buffers = new Buffer2D<float>[componentCount];
for (int i = 0; i < componentCount; i++)
{
float[] values = new float[inputBufferLength];
var values = new float[inputBufferLength];
for (int j = 0; j < inputBufferLength; j++)
{
@ -306,7 +400,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
int resultBufferLength,
int seed)
{
ValidateRgbToYCbCrConversion(
ValidateConversion(
JpegColorConverter.GetConverter(colorSpace, 8),
componentCount,
inputBufferLength,
@ -314,7 +408,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
seed);
}
private static void ValidateRgbToYCbCrConversion(
private static void ValidateConversion(
JpegColorConverter converter,
int componentCount,
int inputBufferLength,
@ -328,8 +422,128 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
for (int i = 0; i < resultBufferLength; i++)
{
ValidateYCbCr(values, result, i);
Validate(converter.ColorSpace, values, result, i);
}
}
private static void Validate(
JpegColorSpace colorSpace,
in JpegColorConverter.ComponentValues values,
Vector4[] result,
int i)
{
switch (colorSpace)
{
case JpegColorSpace.Grayscale:
ValidateGrayScale(values, result, i);
break;
case JpegColorSpace.Ycck:
ValidateCyyK(values, result, i);
break;
case JpegColorSpace.Cmyk:
ValidateCmyk(values, result, i);
break;
case JpegColorSpace.RGB:
ValidateRgb(values, result, i);
break;
case JpegColorSpace.YCbCr:
ValidateYCbCr(values, result, i);
break;
default:
Assert.True(false, $"Colorspace {colorSpace} not supported!");
break;
}
}
private static void ValidateYCbCr(in JpegColorConverter.ComponentValues values, Vector4[] result, int i)
{
float y = values.Component0[i];
float cb = values.Component1[i];
float cr = values.Component2[i];
var ycbcr = new YCbCr(y, cb, cr);
Vector4 rgba = result[i];
var actual = new Rgb(rgba.X, rgba.Y, rgba.Z);
var expected = ColorSpaceConverter.ToRgb(ycbcr);
Assert.Equal(expected, actual, ColorSpaceComparer);
Assert.Equal(1, rgba.W);
}
private static void ValidateCyyK(in JpegColorConverter.ComponentValues values, Vector4[] result, int i)
{
var v = new Vector4(0, 0, 0, 1F);
var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F);
float y = values.Component0[i];
float cb = values.Component1[i] - 128F;
float cr = values.Component2[i] - 128F;
float k = values.Component3[i] / 255F;
v.X = (255F - (float)Math.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k;
v.Y = (255F - (float)Math.Round(
y - (0.344136F * cb) - (0.714136F * cr),
MidpointRounding.AwayFromZero)) * k;
v.Z = (255F - (float)Math.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k;
v.W = 1F;
v *= scale;
Vector4 rgba = result[i];
var actual = new Rgb(rgba.X, rgba.Y, rgba.Z);
var expected = new Rgb(v.X, v.Y, v.Z);
Assert.Equal(expected, actual, ColorSpaceComparer);
Assert.Equal(1, rgba.W);
}
private static void ValidateRgb(in JpegColorConverter.ComponentValues values, Vector4[] result, int i)
{
float r = values.Component0[i];
float g = values.Component1[i];
float b = values.Component2[i];
Vector4 rgba = result[i];
var actual = new Rgb(rgba.X, rgba.Y, rgba.Z);
var expected = new Rgb(r / 255F, g / 255F, b / 255F);
Assert.Equal(expected, actual, ColorSpaceComparer);
Assert.Equal(1, rgba.W);
}
private static void ValidateGrayScale(in JpegColorConverter.ComponentValues values, Vector4[] result, int i)
{
float y = values.Component0[i];
Vector4 rgba = result[i];
var actual = new Rgb(rgba.X, rgba.Y, rgba.Z);
var expected = new Rgb(y / 255F, y / 255F, y / 255F);
Assert.Equal(expected, actual, ColorSpaceComparer);
Assert.Equal(1, rgba.W);
}
private static void ValidateCmyk(in JpegColorConverter.ComponentValues values, Vector4[] result, int i)
{
var v = new Vector4(0, 0, 0, 1F);
var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F);
float c = values.Component0[i];
float m = values.Component1[i];
float y = values.Component2[i];
float k = values.Component3[i] / 255F;
v.X = c * k;
v.Y = m * k;
v.Z = y * k;
v.W = 1F;
v *= scale;
Vector4 rgba = result[i];
var actual = new Rgb(rgba.X, rgba.Y, rgba.Z);
var expected = new Rgb(v.X, v.Y, v.Z);
Assert.Equal(expected, actual, ColorSpaceComparer);
Assert.Equal(1, rgba.W);
}
}
}

33
tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs

@ -14,22 +14,32 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
public const string DecodeProgressiveJpegOutputName = "DecodeProgressiveJpeg";
[Theory]
[WithFileCollection(nameof(ProgressiveTestJpegs), PixelTypes.Rgba32, false)]
[WithFile(TestImages.Jpeg.Progressive.Progress, PixelTypes.Rgba32, true)]
public void DecodeProgressiveJpeg<TPixel>(TestImageProvider<TPixel> provider, bool enforceDiscontiguousBuffers)
[WithFileCollection(nameof(ProgressiveTestJpegs), PixelTypes.Rgba32)]
public void DecodeProgressiveJpeg<TPixel>(TestImageProvider<TPixel> provider)
where TPixel : unmanaged, IPixel<TPixel>
{
using Image<TPixel> image = provider.GetImage(JpegDecoder);
image.DebugSave(provider);
provider.Utility.TestName = DecodeProgressiveJpegOutputName;
image.CompareToReferenceOutput(
GetImageComparer(provider),
provider,
appendPixelTypeToFileName: false);
}
[Theory]
[WithFile(TestImages.Jpeg.Progressive.Progress, PixelTypes.Rgba32)]
public void DecodeProgressiveJpeg_WithLimitedAllocatorBufferCapacity(TestImageProvider<Rgba32> provider)
{
static void RunTest(string providerDump, string nonContiguousBuffersStr)
{
TestImageProvider<TPixel> provider =
BasicSerializer.Deserialize<TestImageProvider<TPixel>>(providerDump);
TestImageProvider<Rgba32> provider =
BasicSerializer.Deserialize<TestImageProvider<Rgba32>>(providerDump);
if (!string.IsNullOrEmpty(nonContiguousBuffersStr))
{
provider.LimitAllocatorBufferCapacity().InBytesSqrt(200);
}
provider.LimitAllocatorBufferCapacity().InBytesSqrt(200);
using Image<TPixel> image = provider.GetImage(JpegDecoder);
using Image<Rgba32> image = provider.GetImage(JpegDecoder);
image.DebugSave(provider, nonContiguousBuffersStr);
provider.Utility.TestName = DecodeProgressiveJpegOutputName;
@ -44,8 +54,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
RemoteExecutor.Invoke(
RunTest,
providerDump,
enforceDiscontiguousBuffers ? "Disco" : string.Empty)
.Dispose();
"Disco").Dispose();
}
}
}

35
tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs

@ -129,10 +129,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[Theory]
[InlineData(TestImages.Jpeg.Baseline.Jpeg420Small, 0)]
[InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 1)]
[InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 10)]
[InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 15)]
[InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 30)]
[InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 1)]
[InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 10)]
[InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 15)]
[InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 30)]
public async Task Decode_IsCancellable(string fileName, int cancellationDelayMs)
{
@ -141,17 +141,32 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
TestEnvironment.InputImagesDirectoryFullPath,
fileName);
var cts = new CancellationTokenSource();
if (cancellationDelayMs == 0)
{
cts.Cancel();
}
else
const int NumberOfRuns = 5;
for (int i = 0; i < NumberOfRuns; i++)
{
cts.CancelAfter(cancellationDelayMs);
var cts = new CancellationTokenSource();
if (cancellationDelayMs == 0)
{
cts.Cancel();
}
else
{
cts.CancelAfter(cancellationDelayMs);
}
try
{
using var image = await Image.LoadAsync(hugeFile, cts.Token);
}
catch (TaskCanceledException)
{
// Succesfully observed a cancellation
return;
}
}
await Assert.ThrowsAsync<TaskCanceledException>(() => Image.LoadAsync(hugeFile, cts.Token));
throw new Exception($"No cancellation happened out of {NumberOfRuns} runs!");
}
[Theory(Skip = "Identify is too fast, doesn't work reliably.")]

7
tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs

@ -404,16 +404,15 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png
[Theory]
[WithFile(TestImages.Png.Splash, PixelTypes.Rgba32)]
[WithFile(TestImages.Png.Bike, PixelTypes.Rgba32)]
public void PngDecoder_CanDecode_WithLimitedAllocatorBufferCapacity<TPixel>(TestImageProvider<TPixel> provider)
where TPixel : unmanaged, IPixel<TPixel>
public void PngDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider<Rgba32> provider)
{
static void RunTest(string providerDump, string nonContiguousBuffersStr)
{
TestImageProvider<TPixel> provider = BasicSerializer.Deserialize<TestImageProvider<TPixel>>(providerDump);
TestImageProvider<Rgba32> provider = BasicSerializer.Deserialize<TestImageProvider<Rgba32>>(providerDump);
provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100);
using Image<TPixel> image = provider.GetImage(PngDecoder);
using Image<Rgba32> image = provider.GetImage(PngDecoder);
image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr);
image.CompareToOriginal(provider);
}

42
tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs

@ -2,13 +2,8 @@
// Licensed under the Apache License, Version 2.0.
// ReSharper disable InconsistentNaming
using System.Diagnostics;
using System.IO;
using System.Linq;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics.X86;
#endif
using Microsoft.DotNet.RemoteExecutor;
using SixLabors.ImageSharp.Formats;
using SixLabors.ImageSharp.Formats.Png;
using SixLabors.ImageSharp.Metadata;
@ -16,7 +11,6 @@ using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing.Processors.Quantization;
using SixLabors.ImageSharp.Tests.TestUtilities;
using SixLabors.ImageSharp.Tests.TestUtilities.ImageComparison;
using Xunit;
namespace SixLabors.ImageSharp.Tests.Formats.Png
@ -536,16 +530,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png
[Theory]
[WithTestPatternImages(100, 100, PixelTypes.Rgba32)]
public void EncodeWorksWithoutSsse3Intrinsics<TPixel>(TestImageProvider<TPixel> provider)
where TPixel : unmanaged, IPixel<TPixel>
public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider<Rgba32> provider)
{
static void RunTest(string providerDump)
static void RunTest(string serialized)
{
TestImageProvider<TPixel> provider =
BasicSerializer.Deserialize<TestImageProvider<TPixel>>(providerDump);
#if SUPPORTS_RUNTIME_INTRINSICS
Assert.False(Ssse3.IsSupported);
#endif
TestImageProvider<Rgba32> provider =
FeatureTestRunner.DeserializeForXunit<TestImageProvider<Rgba32>>(serialized);
foreach (PngInterlaceMode interlaceMode in InterlaceMode)
{
@ -560,19 +550,21 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png
}
}
string providerDump = BasicSerializer.Serialize(provider);
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.DisableSSSE3,
provider);
}
var processStartInfo = new ProcessStartInfo();
processStartInfo.Environment[TestEnvironment.Features.EnableSSE3] = TestEnvironment.Features.Off;
[Fact]
public void EncodeFixesInvalidOptions()
{
// https://github.com/SixLabors/ImageSharp/issues/935
using var ms = new MemoryStream();
var testFile = TestFile.Create(TestImages.Png.Issue935);
using Image<Rgba32> image = testFile.CreateRgba32Image(new PngDecoder());
RemoteExecutor.Invoke(
RunTest,
providerDump,
new RemoteInvokeOptions
{
StartInfo = processStartInfo
})
.Dispose();
image.Save(ms, new PngEncoder { ColorType = PngColorType.RgbWithAlpha });
}
private static void TestPngEncoderCore<TPixel>(

7
tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs

@ -747,16 +747,15 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga
[Theory]
[WithFile(Bit24BottomLeft, PixelTypes.Rgba32)]
[WithFile(Bit32BottomLeft, PixelTypes.Rgba32)]
public void TgaDecoder_CanDecode_WithLimitedAllocatorBufferCapacity<TPixel>(TestImageProvider<TPixel> provider)
where TPixel : unmanaged, IPixel<TPixel>
public void TgaDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider<Rgba32> provider)
{
static void RunTest(string providerDump, string nonContiguousBuffersStr)
{
TestImageProvider<TPixel> provider = BasicSerializer.Deserialize<TestImageProvider<TPixel>>(providerDump);
TestImageProvider<Rgba32> provider = BasicSerializer.Deserialize<TestImageProvider<Rgba32>>(providerDump);
provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100);
using Image<TPixel> image = provider.GetImage(TgaDecoder);
using Image<Rgba32> image = provider.GetImage(TgaDecoder);
image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr);
if (TestEnvironment.IsWindows)

6
tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs

@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga
Image<TPixel> image,
bool useExactComparer = true,
float compareTolerance = 0.01f)
where TPixel : unmanaged, IPixel<TPixel>
where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel<TPixel>
{
string path = TestImageProvider<TPixel>.GetFilePathOrNull(provider);
if (path == null)
@ -39,7 +39,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga
}
public static Image<TPixel> DecodeWithMagick<TPixel>(Configuration configuration, FileInfo fileInfo)
where TPixel : unmanaged, IPixel<TPixel>
where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel<TPixel>
{
using (var magickImage = new MagickImage(fileInfo))
{
@ -48,7 +48,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga
Assert.True(result.TryGetSinglePixelSpan(out Span<TPixel> resultPixels));
using (IPixelCollection pixels = magickImage.GetPixelsUnsafe())
using (IUnsafePixelCollection<ushort> pixels = magickImage.GetPixelsUnsafe())
{
byte[] data = pixels.ToByteArray(PixelMapping.RGBA);

15
tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs

@ -10,6 +10,21 @@ namespace SixLabors.ImageSharp.Tests.Helpers
{
public class ImageMathsTests
{
[Theory]
[InlineData(0)]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
[InlineData(4)]
[InlineData(100)]
[InlineData(123)]
[InlineData(53436353)]
public void Modulo2(int x)
{
int actual = ImageMaths.Modulo2(x);
Assert.Equal(x % 2, actual);
}
[Theory]
[InlineData(0)]
[InlineData(1)]

2
tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs

@ -17,6 +17,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers
[InlineData(0)]
[InlineData(1)]
[InlineData(30)]
[InlineData(63)]
public void Premultiply_VectorSpan(int length)
{
var rnd = new Random(42);
@ -36,6 +37,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers
[InlineData(0)]
[InlineData(1)]
[InlineData(30)]
[InlineData(63)]
public void UnPremultiply_VectorSpan(int length)
{
var rnd = new Random(42);

1
tests/ImageSharp.Tests/ImageSharp.Tests.csproj

@ -26,6 +26,7 @@
<ItemGroup>
<PackageReference Include="Magick.NET-Q16-AnyCPU" />
<PackageReference Include="Microsoft.DotNet.RemoteExecutor" />
<PackageReference Include="Microsoft.DotNet.XUnitExtensions" />
<PackageReference Include="Moq" />
<PackageReference Include="SharpZipLib" />
<PackageReference Include="System.Drawing.Common" />

60
tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs

@ -13,34 +13,49 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats
{
public static class ReferenceImplementations
{
public static Rgba32 MakeRgba32(byte r, byte g, byte b, byte a)
public static byte[] MakeRgba32ByteArray(byte r, byte g, byte b, byte a)
{
Rgba32 d = default;
d.R = r;
d.G = g;
d.B = b;
d.A = a;
return d;
var buffer = new byte[256];
for (int i = 0; i < buffer.Length; i += 4)
{
buffer[i] = r;
buffer[i + 1] = g;
buffer[i + 2] = b;
buffer[i + 3] = a;
}
return buffer;
}
public static Argb32 MakeArgb32(byte r, byte g, byte b, byte a)
public static byte[] MakeArgb32ByteArray(byte r, byte g, byte b, byte a)
{
Argb32 d = default;
d.R = r;
d.G = g;
d.B = b;
d.A = a;
return d;
var buffer = new byte[256];
for (int i = 0; i < buffer.Length; i += 4)
{
buffer[i] = a;
buffer[i + 1] = r;
buffer[i + 2] = g;
buffer[i + 3] = b;
}
return buffer;
}
public static Bgra32 MakeBgra32(byte r, byte g, byte b, byte a)
public static byte[] MakeBgra32ByteArray(byte r, byte g, byte b, byte a)
{
Bgra32 d = default;
d.R = r;
d.G = g;
d.B = b;
d.A = a;
return d;
var buffer = new byte[256];
for (int i = 0; i < buffer.Length; i += 4)
{
buffer[i] = b;
buffer[i + 1] = g;
buffer[i + 2] = r;
buffer[i + 3] = a;
}
return buffer;
}
internal static void To<TSourcePixel, TDestinationPixel>(
@ -83,8 +98,7 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats
if (typeof(TDestinationPixel) == typeof(L8))
{
ref L8 l8Ref = ref MemoryMarshal.GetReference(
MemoryMarshal.Cast<TDestinationPixel, L8>(destinationPixels));
ref L8 l8Ref = ref MemoryMarshal.GetReference(MemoryMarshal.Cast<TDestinationPixel, L8>(destinationPixels));
for (int i = 0; i < count; i++)
{
ref TSourcePixel sp = ref Unsafe.Add(ref sourceRef, i);

67
tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs

@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.PixelFormats.Utils;
@ -33,30 +34,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats
[MemberData(nameof(RgbaData))]
public void ToArgb32(byte r, byte g, byte b, byte a)
{
Rgba32 s = ReferenceImplementations.MakeRgba32(r, g, b, a);
byte[] source = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a);
var actual = new byte[source.Length];
// Act:
uint actualPacked = PixelConverter.FromRgba32.ToArgb32(s.PackedValue);
PixelConverter.FromRgba32.ToArgb32(source, actual);
// Assert:
uint expectedPacked = ReferenceImplementations.MakeArgb32(r, g, b, a).PackedValue;
byte[] expected = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a);
Assert.Equal(expectedPacked, actualPacked);
Assert.Equal(expected, actual);
}
[Theory]
[MemberData(nameof(RgbaData))]
public void ToBgra32(byte r, byte g, byte b, byte a)
{
Rgba32 s = ReferenceImplementations.MakeRgba32(r, g, b, a);
byte[] source = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a);
var actual = new byte[source.Length];
// Act:
uint actualPacked = PixelConverter.FromRgba32.ToBgra32(s.PackedValue);
PixelConverter.FromRgba32.ToBgra32(source, actual);
// Assert:
uint expectedPacked = ReferenceImplementations.MakeBgra32(r, g, b, a).PackedValue;
byte[] expected = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a);
Assert.Equal(expectedPacked, actualPacked);
Assert.Equal(expected, actual);
}
}
@ -66,30 +65,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats
[MemberData(nameof(RgbaData))]
public void ToRgba32(byte r, byte g, byte b, byte a)
{
Argb32 s = ReferenceImplementations.MakeArgb32(r, g, b, a);
byte[] source = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a);
var actual = new byte[source.Length];
// Act:
uint actualPacked = PixelConverter.FromArgb32.ToRgba32(s.PackedValue);
PixelConverter.FromArgb32.ToRgba32(source, actual);
// Assert:
uint expectedPacked = ReferenceImplementations.MakeRgba32(r, g, b, a).PackedValue;
byte[] expected = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a);
Assert.Equal(expectedPacked, actualPacked);
Assert.Equal(expected, actual);
}
[Theory]
[MemberData(nameof(RgbaData))]
public void ToBgra32(byte r, byte g, byte b, byte a)
{
Argb32 s = ReferenceImplementations.MakeArgb32(r, g, b, a);
byte[] source = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a);
var actual = new byte[source.Length];
// Act:
uint actualPacked = PixelConverter.FromArgb32.ToBgra32(s.PackedValue);
PixelConverter.FromArgb32.ToBgra32(source, actual);
// Assert:
uint expectedPacked = ReferenceImplementations.MakeBgra32(r, g, b, a).PackedValue;
byte[] expected = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a);
Assert.Equal(expectedPacked, actualPacked);
Assert.Equal(expected, actual);
}
}
@ -99,30 +96,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats
[MemberData(nameof(RgbaData))]
public void ToArgb32(byte r, byte g, byte b, byte a)
{
Bgra32 s = ReferenceImplementations.MakeBgra32(r, g, b, a);
byte[] source = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a);
var actual = new byte[source.Length];
// Act:
uint actualPacked = PixelConverter.FromBgra32.ToArgb32(s.PackedValue);
PixelConverter.FromBgra32.ToArgb32(source, actual);
// Assert:
uint expectedPacked = ReferenceImplementations.MakeArgb32(r, g, b, a).PackedValue;
byte[] expected = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a);
Assert.Equal(expectedPacked, actualPacked);
Assert.Equal(expected, actual);
}
[Theory]
[MemberData(nameof(RgbaData))]
public void ToRgba32(byte r, byte g, byte b, byte a)
{
Bgra32 s = ReferenceImplementations.MakeBgra32(r, g, b, a);
byte[] source = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a);
var actual = new byte[source.Length];
// Act:
uint actualPacked = PixelConverter.FromBgra32.ToRgba32(s.PackedValue);
PixelConverter.FromBgra32.ToRgba32(source, actual);
// Assert:
uint expectedPacked = ReferenceImplementations.MakeRgba32(r, g, b, a).PackedValue;
byte[] expected = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a);
Assert.Equal(expectedPacked, actualPacked);
Assert.Equal(expected, actual);
}
}
}

1
tests/ImageSharp.Tests/Processing/Binarization/AdaptiveThresholdTests.cs

@ -100,6 +100,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Binarization
[Theory]
[WithFile(TestImages.Png.Bradley01, PixelTypes.Rgba32)]
[WithFile(TestImages.Png.Bradley02, PixelTypes.Rgba32)]
[WithFile(TestImages.Png.Ducky, PixelTypes.Rgba32)]
public void AdaptiveThreshold_Works<TPixel>(TestImageProvider<TPixel> provider)
where TPixel : unmanaged, IPixel<TPixel>
{

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save