Browse Source

Merge pull request #1143 from SixLabors/af/block-scale-optimization

Undo jpeg perf regression, add various optimizations
pull/1574/head
Anton Firszov 6 years ago
committed by GitHub
parent
commit
162884092c
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 103
      src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs
  2. 28
      src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
  3. 20
      src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
  4. 22
      src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs
  5. 33
      src/ImageSharp/Common/Helpers/SimdUtils.cs
  6. 6
      src/ImageSharp/Common/Tuples/Vector4Pair.cs
  7. 2
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs
  8. 2
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt
  9. 35
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopyTo.cs
  10. 22
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
  11. 16
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs
  12. 8
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs
  13. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs
  14. 14
      src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
  15. 19
      src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegComponentPostProcessor.cs
  16. 9
      src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs
  17. 8
      src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.cs
  18. 46
      src/ImageSharp/Memory/Buffer2DExtensions.cs
  19. 37
      src/ImageSharp/Memory/Buffer2D{T}.cs
  20. 18
      src/ImageSharp/Memory/BufferArea{T}.cs
  21. 4
      src/ImageSharp/PixelFormats/PixelImplementations/Rgba32.PixelOperations.cs
  22. 6
      src/ImageSharp/PixelFormats/Utils/Vector4Converters.RgbaCompatible.cs
  23. 18
      src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs
  24. 303
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_CopyTo1x1.cs
  25. 442
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Round.cs
  26. 1
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg_ImageSpecific.cs
  27. 8
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs
  28. 126
      tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs
  29. 6
      tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs
  30. 8
      tests/ImageSharp.Benchmarks/Config.cs
  31. 14
      tests/ImageSharp.Tests.ProfilingSandbox/Program.cs
  32. 38
      tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
  33. 4
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.CopyToBufferArea.cs
  34. 14
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
  35. 10
      tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
  36. 4
      tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs
  37. 4
      tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.LLM_FloatingPoint_DCT.cs
  38. 23
      tests/ImageSharp.Tests/Processing/Processors/Convolution/DetectEdgesTest.cs
  39. 13
      tests/ImageSharp.Tests/Processing/Processors/Effects/OilPaintTest.cs
  40. 2
      tests/ImageSharp.Tests/Processing/Processors/Quantization/QuantizerTests.cs
  41. 12
      tests/ImageSharp.Tests/Processing/Processors/Transforms/EntropyCropTest.cs
  42. 31
      tests/ImageSharp.Tests/ProfilingBenchmarks/JpegProfilingBenchmarks.cs

103
src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs

@ -0,0 +1,103 @@
// Copyright (c) Six Labors and contributors.
// Licensed under the Apache License, Version 2.0.
#if SUPPORTS_RUNTIME_INTRINSICS
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace SixLabors.ImageSharp
{
internal static partial class SimdUtils
{
public static class Avx2Intrinsics
{
private static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 };
/// <summary>
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void NormalizedFloatToByteSaturateReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
if (Avx2.IsSupported)
{
int remainder = ImageMaths.ModuloP2(source.Length, Vector<byte>.Count);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
NormalizedFloatToByteSaturate(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount));
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
}
/// <summary>
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
/// </summary>
/// <remarks>
/// Implementation is based on MagicScaler code:
/// https://github.com/saucecontrol/PhotoSauce/blob/a9bd6e5162d2160419f0cf743fd4f536c079170b/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L453-L477
/// </remarks>
internal static void NormalizedFloatToByteSaturate(
ReadOnlySpan<float> source,
Span<byte> dest)
{
VerifySpanInput(source, dest, Vector256<byte>.Count);
int n = dest.Length / Vector256<byte>.Count;
ref Vector256<float> sourceBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
ref Vector256<byte> destBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
var maxBytes = Vector256.Create(255f);
ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32);
Vector256<int> mask = Unsafe.As<byte, Vector256<int>>(ref maskBase);
for (int i = 0; i < n; i++)
{
ref Vector256<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
Vector256<float> f0 = s;
Vector256<float> f1 = Unsafe.Add(ref s, 1);
Vector256<float> f2 = Unsafe.Add(ref s, 2);
Vector256<float> f3 = Unsafe.Add(ref s, 3);
Vector256<int> w0 = ConvertToInt32(f0, maxBytes);
Vector256<int> w1 = ConvertToInt32(f1, maxBytes);
Vector256<int> w2 = ConvertToInt32(f2, maxBytes);
Vector256<int> w3 = ConvertToInt32(f3, maxBytes);
Vector256<short> u0 = Avx2.PackSignedSaturate(w0, w1);
Vector256<short> u1 = Avx2.PackSignedSaturate(w2, w3);
Vector256<byte> b = Avx2.PackUnsignedSaturate(u0, u1);
b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte();
Unsafe.Add(ref destBase, i) = b;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector256<int> ConvertToInt32(Vector256<float> vf, Vector256<float> scale)
{
vf = Avx.Multiply(vf, scale);
return Avx.ConvertToVector256Int32(vf);
}
}
}
}
#endif

28
src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs

@ -17,14 +17,14 @@ namespace SixLabors.ImageSharp
/// </summary> /// </summary>
public static class BasicIntrinsics256 public static class BasicIntrinsics256
{ {
public static bool IsAvailable { get; } = IsAvx2CompatibleArchitecture; public static bool IsAvailable { get; } = HasVector8;
#if !SUPPORTS_EXTENDED_INTRINSICS #if !SUPPORTS_EXTENDED_INTRINSICS
/// <summary> /// <summary>
/// <see cref="BulkConvertByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder). /// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertByteToNormalizedFloatReduce( internal static void ByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source, ref ReadOnlySpan<byte> source,
ref Span<float> dest) ref Span<float> dest)
{ {
@ -40,7 +40,7 @@ namespace SixLabors.ImageSharp
if (adjustedCount > 0) if (adjustedCount > 0)
{ {
BulkConvertByteToNormalizedFloat( ByteToNormalizedFloat(
source.Slice(0, adjustedCount), source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount)); dest.Slice(0, adjustedCount));
@ -50,10 +50,10 @@ namespace SixLabors.ImageSharp
} }
/// <summary> /// <summary>
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as many elements as possible, slicing them down (keeping the remainder). /// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce( internal static void NormalizedFloatToByteSaturateReduce(
ref ReadOnlySpan<float> source, ref ReadOnlySpan<float> source,
ref Span<byte> dest) ref Span<byte> dest)
{ {
@ -69,7 +69,7 @@ namespace SixLabors.ImageSharp
if (adjustedCount > 0) if (adjustedCount > 0)
{ {
BulkConvertNormalizedFloatToByteClampOverflows(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount)); NormalizedFloatToByteSaturate(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount));
source = source.Slice(adjustedCount); source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount); dest = dest.Slice(adjustedCount);
@ -78,15 +78,15 @@ namespace SixLabors.ImageSharp
#endif #endif
/// <summary> /// <summary>
/// SIMD optimized implementation for <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/>. /// SIMD optimized implementation for <see cref="SimdUtils.ByteToNormalizedFloat"/>.
/// Works only with span Length divisible by 8. /// Works only with span Length divisible by 8.
/// Implementation adapted from: /// Implementation adapted from:
/// http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions /// http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions
/// http://stackoverflow.com/a/536278 /// http://stackoverflow.com/a/536278
/// </summary> /// </summary>
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{ {
VerifyIsAvx2Compatible(nameof(BulkConvertByteToNormalizedFloat)); VerifyHasVector8(nameof(ByteToNormalizedFloat));
VerifySpanInput(source, dest, 8); VerifySpanInput(source, dest, 8);
var bVec = new Vector<float>(256.0f / 255.0f); var bVec = new Vector<float>(256.0f / 255.0f);
@ -124,11 +124,11 @@ namespace SixLabors.ImageSharp
} }
/// <summary> /// <summary>
/// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/> which is faster on older runtimes. /// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/> which is faster on older runtimes.
/// </summary> /// </summary>
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest) internal static void NormalizedFloatToByteSaturate(ReadOnlySpan<float> source, Span<byte> dest)
{ {
VerifyIsAvx2Compatible(nameof(BulkConvertNormalizedFloatToByteClampOverflows)); VerifyHasVector8(nameof(NormalizedFloatToByteSaturate));
VerifySpanInput(source, dest, 8); VerifySpanInput(source, dest, 8);
if (source.Length == 0) if (source.Length == 0)
@ -177,7 +177,7 @@ namespace SixLabors.ImageSharp
/// </summary> /// </summary>
internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan<float> source, Span<byte> dest) internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan<float> source, Span<byte> dest)
{ {
VerifyIsAvx2Compatible(nameof(BulkConvertNormalizedFloatToByte)); VerifyHasVector8(nameof(BulkConvertNormalizedFloatToByte));
VerifySpanInput(source, dest, 8); VerifySpanInput(source, dest, 8);
if (source.Length == 0) if (source.Length == 0)

20
src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs

@ -43,10 +43,10 @@ namespace SixLabors.ImageSharp
} }
/// <summary> /// <summary>
/// <see cref="BulkConvertByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder). /// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertByteToNormalizedFloatReduce( internal static void ByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source, ref ReadOnlySpan<byte> source,
ref Span<float> dest) ref Span<float> dest)
{ {
@ -62,7 +62,7 @@ namespace SixLabors.ImageSharp
if (adjustedCount > 0) if (adjustedCount > 0)
{ {
BulkConvertByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount)); ByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount));
source = source.Slice(adjustedCount); source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount); dest = dest.Slice(adjustedCount);
@ -70,10 +70,10 @@ namespace SixLabors.ImageSharp
} }
/// <summary> /// <summary>
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as many elements as possible, slicing them down (keeping the remainder). /// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce( internal static void NormalizedFloatToByteSaturateReduce(
ref ReadOnlySpan<float> source, ref ReadOnlySpan<float> source,
ref Span<byte> dest) ref Span<byte> dest)
{ {
@ -89,7 +89,7 @@ namespace SixLabors.ImageSharp
if (adjustedCount > 0) if (adjustedCount > 0)
{ {
BulkConvertNormalizedFloatToByteClampOverflows( NormalizedFloatToByteSaturate(
source.Slice(0, adjustedCount), source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount)); dest.Slice(0, adjustedCount));
@ -99,9 +99,9 @@ namespace SixLabors.ImageSharp
} }
/// <summary> /// <summary>
/// Implementation <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime. /// Implementation <see cref="SimdUtils.ByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime.
/// </summary> /// </summary>
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{ {
VerifySpanInput(source, dest, Vector<byte>.Count); VerifySpanInput(source, dest, Vector<byte>.Count);
@ -132,9 +132,9 @@ namespace SixLabors.ImageSharp
} }
/// <summary> /// <summary>
/// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/>, which is faster on new .NET runtime. /// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
/// </summary> /// </summary>
internal static void BulkConvertNormalizedFloatToByteClampOverflows( internal static void NormalizedFloatToByteSaturate(
ReadOnlySpan<float> source, ReadOnlySpan<float> source,
Span<byte> dest) Span<byte> dest)
{ {

22
src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs

@ -19,10 +19,10 @@ namespace SixLabors.ImageSharp
public static class FallbackIntrinsics128 public static class FallbackIntrinsics128
{ {
/// <summary> /// <summary>
/// <see cref="BulkConvertByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder). /// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertByteToNormalizedFloatReduce( internal static void ByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source, ref ReadOnlySpan<byte> source,
ref Span<float> dest) ref Span<float> dest)
{ {
@ -33,7 +33,7 @@ namespace SixLabors.ImageSharp
if (adjustedCount > 0) if (adjustedCount > 0)
{ {
BulkConvertByteToNormalizedFloat( ByteToNormalizedFloat(
source.Slice(0, adjustedCount), source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount)); dest.Slice(0, adjustedCount));
@ -43,10 +43,10 @@ namespace SixLabors.ImageSharp
} }
/// <summary> /// <summary>
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as many elements as possible, slicing them down (keeping the remainder). /// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce( internal static void NormalizedFloatToByteSaturateReduce(
ref ReadOnlySpan<float> source, ref ReadOnlySpan<float> source,
ref Span<byte> dest) ref Span<byte> dest)
{ {
@ -57,7 +57,7 @@ namespace SixLabors.ImageSharp
if (adjustedCount > 0) if (adjustedCount > 0)
{ {
BulkConvertNormalizedFloatToByteClampOverflows( NormalizedFloatToByteSaturate(
source.Slice(0, adjustedCount), source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount)); dest.Slice(0, adjustedCount));
@ -67,10 +67,10 @@ namespace SixLabors.ImageSharp
} }
/// <summary> /// <summary>
/// Implementation of <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/> using <see cref="Vector4"/>. /// Implementation of <see cref="SimdUtils.ByteToNormalizedFloat"/> using <see cref="Vector4"/>.
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ColdPath)] [MethodImpl(InliningOptions.ColdPath)]
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{ {
VerifySpanInput(source, dest, 4); VerifySpanInput(source, dest, 4);
@ -99,10 +99,10 @@ namespace SixLabors.ImageSharp
} }
/// <summary> /// <summary>
/// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/> using <see cref="Vector4"/>. /// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/> using <see cref="Vector4"/>.
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ColdPath)] [MethodImpl(InliningOptions.ColdPath)]
internal static void BulkConvertNormalizedFloatToByteClampOverflows( internal static void NormalizedFloatToByteSaturate(
ReadOnlySpan<float> source, ReadOnlySpan<float> source,
Span<byte> dest) Span<byte> dest)
{ {
@ -148,4 +148,4 @@ namespace SixLabors.ImageSharp
} }
} }
} }
} }

33
src/ImageSharp/Common/Helpers/SimdUtils.cs

@ -15,9 +15,10 @@ namespace SixLabors.ImageSharp
internal static partial class SimdUtils internal static partial class SimdUtils
{ {
/// <summary> /// <summary>
/// Gets a value indicating whether the code is being executed on AVX2 CPU where both float and integer registers are of size 256 byte. /// Gets a value indicating whether <see cref="Vector{T}"/> code is being JIT-ed to AVX2 instructions
/// where both float and integer registers are of size 256 byte.
/// </summary> /// </summary>
public static bool IsAvx2CompatibleArchitecture { get; } = public static bool HasVector8 { get; } =
Vector.IsHardwareAccelerated && Vector<float>.Count == 8 && Vector<int>.Count == 8; Vector.IsHardwareAccelerated && Vector<float>.Count == 8 && Vector<int>.Count == 8;
/// <summary> /// <summary>
@ -60,16 +61,18 @@ namespace SixLabors.ImageSharp
/// <param name="source">The source span of bytes</param> /// <param name="source">The source span of bytes</param>
/// <param name="dest">The destination span of floats</param> /// <param name="dest">The destination span of floats</param>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{ {
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
#if SUPPORTS_EXTENDED_INTRINSICS #if SUPPORTS_EXTENDED_INTRINSICS
ExtendedIntrinsics.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest); ExtendedIntrinsics.ByteToNormalizedFloatReduce(ref source, ref dest);
#else #else
BasicIntrinsics256.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest); BasicIntrinsics256.ByteToNormalizedFloatReduce(ref source, ref dest);
#endif #endif
FallbackIntrinsics128.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest);
// Also deals with the remainder from previous conversions:
FallbackIntrinsics128.ByteToNormalizedFloatReduce(ref source, ref dest);
// Deal with the remainder: // Deal with the remainder:
if (source.Length > 0) if (source.Length > 0)
@ -87,16 +90,20 @@ namespace SixLabors.ImageSharp
/// <param name="source">The source span of floats</param> /// <param name="source">The source span of floats</param>
/// <param name="dest">The destination span of bytes</param> /// <param name="dest">The destination span of bytes</param>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest) internal static void NormalizedFloatToByteSaturate(ReadOnlySpan<float> source, Span<byte> dest)
{ {
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
#if SUPPORTS_EXTENDED_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest); Avx2Intrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest);
#elif SUPPORTS_EXTENDED_INTRINSICS
ExtendedIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest);
#else #else
BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest); BasicIntrinsics256.NormalizedFloatToByteSaturateReduce(ref source, ref dest);
#endif #endif
FallbackIntrinsics128.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest);
// Also deals with the remainder from previous conversions:
FallbackIntrinsics128.NormalizedFloatToByteSaturateReduce(ref source, ref dest);
// Deal with the remainder: // Deal with the remainder:
if (source.Length > 0) if (source.Length > 0)
@ -151,9 +158,9 @@ namespace SixLabors.ImageSharp
private static byte ConvertToByte(float f) => (byte)ComparableExtensions.Clamp((f * 255f) + 0.5f, 0, 255f); private static byte ConvertToByte(float f) => (byte)ComparableExtensions.Clamp((f * 255f) + 0.5f, 0, 255f);
[Conditional("DEBUG")] [Conditional("DEBUG")]
private static void VerifyIsAvx2Compatible(string operation) private static void VerifyHasVector8(string operation)
{ {
if (!IsAvx2CompatibleArchitecture) if (!HasVector8)
{ {
throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!"); throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!");
} }

6
src/ImageSharp/Common/Tuples/Vector4Pair.cs

@ -44,7 +44,7 @@ namespace SixLabors.ImageSharp.Tuples
/// Downscale method, specific to Jpeg color conversion. Works only if Vector{float}.Count == 4! /// TODO: Move it somewhere else. /// Downscale method, specific to Jpeg color conversion. Works only if Vector{float}.Count == 4! /// TODO: Move it somewhere else.
/// </summary> /// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal void RoundAndDownscalePreAvx2(float downscaleFactor) internal void RoundAndDownscalePreVector8(float downscaleFactor)
{ {
ref Vector<float> a = ref Unsafe.As<Vector4, Vector<float>>(ref this.A); ref Vector<float> a = ref Unsafe.As<Vector4, Vector<float>>(ref this.A);
a = a.FastRound(); a = a.FastRound();
@ -63,7 +63,7 @@ namespace SixLabors.ImageSharp.Tuples
/// TODO: Move it somewhere else. /// TODO: Move it somewhere else.
/// </summary> /// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal void RoundAndDownscaleAvx2(float downscaleFactor) internal void RoundAndDownscaleVector8(float downscaleFactor)
{ {
ref Vector<float> self = ref Unsafe.As<Vector4Pair, Vector<float>>(ref this); ref Vector<float> self = ref Unsafe.As<Vector4Pair, Vector<float>>(ref this);
Vector<float> v = self; Vector<float> v = self;
@ -79,4 +79,4 @@ namespace SixLabors.ImageSharp.Tuples
return $"{nameof(Vector4Pair)}({this.A}, {this.B})"; return $"{nameof(Vector4Pair)}({this.A}, {this.B})";
} }
} }
} }

2
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs

@ -121,7 +121,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// AVX2-only variant for executing <see cref="NormalizeColorsInplace"/> and <see cref="RoundInplace"/> in one step. /// AVX2-only variant for executing <see cref="NormalizeColorsInplace"/> and <see cref="RoundInplace"/> in one step.
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public void NormalizeColorsAndRoundInplaceAvx2(float maximum) public void NormalizeColorsAndRoundInplaceVector8(float maximum)
{ {
var off = new Vector<float>(MathF.Ceiling(maximum / 2)); var off = new Vector<float>(MathF.Ceiling(maximum / 2));
var max = new Vector<float>(maximum); var max = new Vector<float>(maximum);

2
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt

@ -84,7 +84,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// AVX2-only variant for executing <see cref="NormalizeColorsInplace"/> and <see cref="RoundInplace"/> in one step. /// AVX2-only variant for executing <see cref="NormalizeColorsInplace"/> and <see cref="RoundInplace"/> in one step.
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public void NormalizeColorsAndRoundInplaceAvx2(float maximum) public void NormalizeColorsAndRoundInplaceVector8(float maximum)
{ {
var off = new Vector<float>(MathF.Ceiling(maximum / 2)); var off = new Vector<float>(MathF.Ceiling(maximum / 2));
var max = new Vector<float>(maximum); var max = new Vector<float>(maximum);

35
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.CopyTo.cs → src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopyTo.cs

@ -15,29 +15,36 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// Copy block data into the destination color buffer pixel area with the provided horizontal and vertical scale factors. /// Copy block data into the destination color buffer pixel area with the provided horizontal and vertical scale factors.
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public void CopyTo(in BufferArea<float> area, int horizontalScale, int verticalScale) public void ScaledCopyTo(in BufferArea<float> area, int horizontalScale, int verticalScale)
{
ref float areaOrigin = ref area.GetReferenceToOrigin();
this.ScaledCopyTo(ref areaOrigin, area.Stride, horizontalScale, verticalScale);
}
[MethodImpl(InliningOptions.ShortMethod)]
public void ScaledCopyTo(ref float areaOrigin, int areaStride, int horizontalScale, int verticalScale)
{ {
if (horizontalScale == 1 && verticalScale == 1) if (horizontalScale == 1 && verticalScale == 1)
{ {
this.Copy1x1Scale(area); this.Copy1x1Scale(ref areaOrigin, areaStride);
return; return;
} }
if (horizontalScale == 2 && verticalScale == 2) if (horizontalScale == 2 && verticalScale == 2)
{ {
this.Copy2x2Scale(area); this.Copy2x2Scale(ref areaOrigin, areaStride);
return; return;
} }
// TODO: Optimize: implement all cases with scale-specific, loopless code! // TODO: Optimize: implement all cases with scale-specific, loopless code!
this.CopyArbitraryScale(area, horizontalScale, verticalScale); this.CopyArbitraryScale(ref areaOrigin, areaStride, horizontalScale, verticalScale);
} }
public void Copy1x1Scale(in BufferArea<float> destination) public void Copy1x1Scale(ref float areaOrigin, int areaStride)
{ {
ref byte selfBase = ref Unsafe.As<Block8x8F, byte>(ref this); ref byte selfBase = ref Unsafe.As<Block8x8F, byte>(ref this);
ref byte destBase = ref Unsafe.As<float, byte>(ref destination.GetReferenceToOrigin()); ref byte destBase = ref Unsafe.As<float, byte>(ref areaOrigin);
int destStride = destination.Stride * sizeof(float); int destStride = areaStride * sizeof(float);
CopyRowImpl(ref selfBase, ref destBase, destStride, 0); CopyRowImpl(ref selfBase, ref destBase, destStride, 0);
CopyRowImpl(ref selfBase, ref destBase, destStride, 1); CopyRowImpl(ref selfBase, ref destBase, destStride, 1);
@ -57,10 +64,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
Unsafe.CopyBlock(ref d, ref s, 8 * sizeof(float)); Unsafe.CopyBlock(ref d, ref s, 8 * sizeof(float));
} }
private void Copy2x2Scale(in BufferArea<float> area) private void Copy2x2Scale(ref float areaOrigin, int areaStride)
{ {
ref Vector2 destBase = ref Unsafe.As<float, Vector2>(ref area.GetReferenceToOrigin()); ref Vector2 destBase = ref Unsafe.As<float, Vector2>(ref areaOrigin);
int destStride = area.Stride / 2; int destStride = areaStride / 2;
this.WidenCopyRowImpl2x2(ref destBase, 0, destStride); this.WidenCopyRowImpl2x2(ref destBase, 0, destStride);
this.WidenCopyRowImpl2x2(ref destBase, 1, destStride); this.WidenCopyRowImpl2x2(ref destBase, 1, destStride);
@ -110,10 +117,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
} }
[MethodImpl(InliningOptions.ColdPath)] [MethodImpl(InliningOptions.ColdPath)]
private void CopyArbitraryScale(BufferArea<float> area, int horizontalScale, int verticalScale) private void CopyArbitraryScale(ref float areaOrigin, int areaStride, int horizontalScale, int verticalScale)
{ {
ref float destBase = ref area.GetReferenceToOrigin();
for (int y = 0; y < 8; y++) for (int y = 0; y < 8; y++)
{ {
int yy = y * verticalScale; int yy = y * verticalScale;
@ -127,12 +132,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
for (int i = 0; i < verticalScale; i++) for (int i = 0; i < verticalScale; i++)
{ {
int baseIdx = ((yy + i) * area.Stride) + xx; int baseIdx = ((yy + i) * areaStride) + xx;
for (int j = 0; j < horizontalScale; j++) for (int j = 0; j < horizontalScale; j++)
{ {
// area[xx + j, yy + i] = value; // area[xx + j, yy + i] = value;
Unsafe.Add(ref destBase, baseIdx + j) = value; Unsafe.Add(ref areaOrigin, baseIdx + j) = value;
} }
} }
} }

22
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

@ -201,7 +201,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// </summary> /// </summary>
/// <param name="dest">Destination</param> /// <param name="dest">Destination</param>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public void CopyTo(Span<float> dest) public void ScaledCopyTo(Span<float> dest)
{ {
ref byte d = ref Unsafe.As<float, byte>(ref MemoryMarshal.GetReference(dest)); ref byte d = ref Unsafe.As<float, byte>(ref MemoryMarshal.GetReference(dest));
ref byte s = ref Unsafe.As<Block8x8F, byte>(ref this); ref byte s = ref Unsafe.As<Block8x8F, byte>(ref this);
@ -215,7 +215,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <param name="blockPtr">Pointer to block</param> /// <param name="blockPtr">Pointer to block</param>
/// <param name="dest">Destination</param> /// <param name="dest">Destination</param>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static unsafe void CopyTo(Block8x8F* blockPtr, Span<byte> dest) public static unsafe void ScaledCopyTo(Block8x8F* blockPtr, Span<byte> dest)
{ {
float* fPtr = (float*)blockPtr; float* fPtr = (float*)blockPtr;
for (int i = 0; i < Size; i++) for (int i = 0; i < Size; i++)
@ -231,9 +231,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <param name="blockPtr">The block pointer.</param> /// <param name="blockPtr">The block pointer.</param>
/// <param name="dest">The destination.</param> /// <param name="dest">The destination.</param>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static unsafe void CopyTo(Block8x8F* blockPtr, Span<float> dest) public static unsafe void ScaledCopyTo(Block8x8F* blockPtr, Span<float> dest)
{ {
blockPtr->CopyTo(dest); blockPtr->ScaledCopyTo(dest);
} }
/// <summary> /// <summary>
@ -241,7 +241,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// </summary> /// </summary>
/// <param name="dest">Destination</param> /// <param name="dest">Destination</param>
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public unsafe void CopyTo(float[] dest) public unsafe void ScaledCopyTo(float[] dest)
{ {
fixed (void* ptr = &this.V0L) fixed (void* ptr = &this.V0L)
{ {
@ -253,7 +253,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// Copy raw 32bit floating point data to dest /// Copy raw 32bit floating point data to dest
/// </summary> /// </summary>
/// <param name="dest">Destination</param> /// <param name="dest">Destination</param>
public unsafe void CopyTo(Span<int> dest) public unsafe void ScaledCopyTo(Span<int> dest)
{ {
fixed (Vector4* ptr = &this.V0L) fixed (Vector4* ptr = &this.V0L)
{ {
@ -268,7 +268,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public float[] ToArray() public float[] ToArray()
{ {
var result = new float[Size]; var result = new float[Size];
this.CopyTo(result); this.ScaledCopyTo(result);
return result; return result;
} }
@ -471,9 +471,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// </summary> /// </summary>
public void NormalizeColorsAndRoundInplace(float maximum) public void NormalizeColorsAndRoundInplace(float maximum)
{ {
if (SimdUtils.IsAvx2CompatibleArchitecture) if (SimdUtils.HasVector8)
{ {
this.NormalizeColorsAndRoundInplaceAvx2(maximum); this.NormalizeColorsAndRoundInplaceVector8(maximum);
} }
else else
{ {
@ -497,7 +497,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public void LoadFrom(ref Block8x8 source) public void LoadFrom(ref Block8x8 source)
{ {
#if SUPPORTS_EXTENDED_INTRINSICS #if SUPPORTS_EXTENDED_INTRINSICS
if (SimdUtils.IsAvx2CompatibleArchitecture) if (SimdUtils.HasVector8)
{ {
this.LoadFromInt16ExtendedAvx2(ref source); this.LoadFromInt16ExtendedAvx2(ref source);
return; return;
@ -513,7 +513,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public void LoadFromInt16ExtendedAvx2(ref Block8x8 source) public void LoadFromInt16ExtendedAvx2(ref Block8x8 source)
{ {
DebugGuard.IsTrue( DebugGuard.IsTrue(
SimdUtils.IsAvx2CompatibleArchitecture, SimdUtils.HasVector8,
"LoadFromUInt16ExtendedAvx2 only works on AVX2 compatible architecture!"); "LoadFromUInt16ExtendedAvx2 only works on AVX2 compatible architecture!");
ref Vector<short> sRef = ref Unsafe.As<Block8x8, Vector<short>>(ref source); ref Vector<short> sRef = ref Unsafe.As<Block8x8, Vector<short>>(ref source);

16
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs

@ -90,15 +90,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
if (Vector<float>.Count == 4) if (Vector<float>.Count == 4)
{ {
// TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector<T> is terrible?) // TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector<T> is terrible?)
r.RoundAndDownscalePreAvx2(maxValue); r.RoundAndDownscalePreVector8(maxValue);
g.RoundAndDownscalePreAvx2(maxValue); g.RoundAndDownscalePreVector8(maxValue);
b.RoundAndDownscalePreAvx2(maxValue); b.RoundAndDownscalePreVector8(maxValue);
} }
else if (SimdUtils.IsAvx2CompatibleArchitecture) else if (SimdUtils.HasVector8)
{ {
r.RoundAndDownscaleAvx2(maxValue); r.RoundAndDownscaleVector8(maxValue);
g.RoundAndDownscaleAvx2(maxValue); g.RoundAndDownscaleVector8(maxValue);
b.RoundAndDownscaleAvx2(maxValue); b.RoundAndDownscaleVector8(maxValue);
} }
else else
{ {
@ -114,4 +114,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
} }
} }
} }
} }

8
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs

@ -13,14 +13,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{ {
internal abstract partial class JpegColorConverter internal abstract partial class JpegColorConverter
{ {
internal sealed class FromYCbCrSimdAvx2 : JpegColorConverter internal sealed class FromYCbCrSimdVector8 : JpegColorConverter
{ {
public FromYCbCrSimdAvx2(int precision) public FromYCbCrSimdVector8(int precision)
: base(JpegColorSpace.YCbCr, precision) : base(JpegColorSpace.YCbCr, precision)
{ {
} }
public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.IsAvx2CompatibleArchitecture; public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8;
public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result) public override void ConvertToRgba(in ComponentValues values, Span<Vector4> result)
{ {
@ -107,4 +107,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
} }
} }
} }
} }

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs

@ -93,7 +93,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
/// Returns the <see cref="JpegColorConverter"/> for the YCbCr colorspace that matches the current CPU architecture. /// Returns the <see cref="JpegColorConverter"/> for the YCbCr colorspace that matches the current CPU architecture.
/// </summary> /// </summary>
private static JpegColorConverter GetYCbCrConverter(int precision) => private static JpegColorConverter GetYCbCrConverter(int precision) =>
FromYCbCrSimdAvx2.IsAvailable ? (JpegColorConverter)new FromYCbCrSimdAvx2(precision) : new FromYCbCrSimd(precision); FromYCbCrSimdVector8.IsAvailable ? (JpegColorConverter)new FromYCbCrSimdVector8(precision) : new FromYCbCrSimd(precision);
/// <summary> /// <summary>
/// A stack-only struct to reference the input buffers using <see cref="ReadOnlySpan{T}"/>-s. /// A stack-only struct to reference the input buffers using <see cref="ReadOnlySpan{T}"/>-s.

14
src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs

@ -68,11 +68,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
/// - Copy the resulting color values into 'destArea' scaling up the block by amount defined in <see cref="subSamplingDivisors"/>. /// - Copy the resulting color values into 'destArea' scaling up the block by amount defined in <see cref="subSamplingDivisors"/>.
/// </summary> /// </summary>
/// <param name="sourceBlock">The source block.</param> /// <param name="sourceBlock">The source block.</param>
/// <param name="destArea">The destination buffer area.</param> /// <param name="destAreaOrigin">Reference to the origin of the destination pixel area.</param>
/// <param name="destAreaStride">The width of the destination pixel buffer.</param>
/// <param name="maximumValue">The maximum value derived from the bitdepth.</param> /// <param name="maximumValue">The maximum value derived from the bitdepth.</param>
public void ProcessBlockColorsInto( public void ProcessBlockColorsInto(
ref Block8x8 sourceBlock, ref Block8x8 sourceBlock,
in BufferArea<float> destArea, ref float destAreaOrigin,
int destAreaStride,
float maximumValue) float maximumValue)
{ {
ref Block8x8F b = ref this.SourceBlock; ref Block8x8F b = ref this.SourceBlock;
@ -88,7 +90,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
// To be "more accurate", we need to emulate this by rounding! // To be "more accurate", we need to emulate this by rounding!
this.WorkspaceBlock1.NormalizeColorsAndRoundInplace(maximumValue); this.WorkspaceBlock1.NormalizeColorsAndRoundInplace(maximumValue);
this.WorkspaceBlock1.CopyTo(destArea, this.subSamplingDivisors.Width, this.subSamplingDivisors.Height); this.WorkspaceBlock1.ScaledCopyTo(
ref destAreaOrigin,
destAreaStride,
this.subSamplingDivisors.Width,
this.subSamplingDivisors.Height);
} }
} }
} }

19
src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegComponentPostProcessor.cs

@ -79,6 +79,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
var blockPp = new JpegBlockPostProcessor(this.ImagePostProcessor.RawJpeg, this.Component); var blockPp = new JpegBlockPostProcessor(this.ImagePostProcessor.RawJpeg, this.Component);
float maximumValue = MathF.Pow(2, this.ImagePostProcessor.RawJpeg.Precision) - 1; float maximumValue = MathF.Pow(2, this.ImagePostProcessor.RawJpeg.Precision) - 1;
int destAreaStride = this.ColorBuffer.Width;
for (int y = 0; y < this.BlockRowsPerStep; y++) for (int y = 0; y < this.BlockRowsPerStep; y++)
{ {
int yBlock = this.currentComponentRowInBlocks + y; int yBlock = this.currentComponentRowInBlocks + y;
@ -90,22 +92,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
int yBuffer = y * this.blockAreaSize.Height; int yBuffer = y * this.blockAreaSize.Height;
Span<float> colorBufferRow = this.ColorBuffer.GetRowSpan(yBuffer);
Span<Block8x8> blockRow = this.Component.SpectralBlocks.GetRowSpan(yBlock); Span<Block8x8> blockRow = this.Component.SpectralBlocks.GetRowSpan(yBlock);
ref Block8x8 blockRowBase = ref MemoryMarshal.GetReference(blockRow); // see: https://github.com/SixLabors/ImageSharp/issues/824
int widthInBlocks = Math.Min(this.Component.SpectralBlocks.Width, this.SizeInBlocks.Width);
for (int xBlock = 0; xBlock < this.SizeInBlocks.Width; xBlock++) for (int xBlock = 0; xBlock < widthInBlocks; xBlock++)
{ {
ref Block8x8 block = ref Unsafe.Add(ref blockRowBase, xBlock); ref Block8x8 block = ref blockRow[xBlock];
int xBuffer = xBlock * this.blockAreaSize.Width; int xBuffer = xBlock * this.blockAreaSize.Width;
ref float destAreaOrigin = ref colorBufferRow[xBuffer];
BufferArea<float> destArea = this.ColorBuffer.GetArea( blockPp.ProcessBlockColorsInto(ref block, ref destAreaOrigin, destAreaStride, maximumValue);
xBuffer,
yBuffer,
this.blockAreaSize.Width,
this.blockAreaSize.Height);
blockPp.ProcessBlockColorsInto(ref block, destArea, maximumValue);
} }
} }

9
src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.Buffer{T}.cs

@ -3,6 +3,7 @@
using System; using System;
using System.Buffers; using System.Buffers;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Memory.Internals; using SixLabors.ImageSharp.Memory.Internals;
@ -50,7 +51,7 @@ namespace SixLabors.ImageSharp.Memory
{ {
if (this.Data is null) if (this.Data is null)
{ {
throw new ObjectDisposedException("ArrayPoolMemoryAllocator.Buffer<T>"); ThrowObjectDisposedException();
} }
return MemoryMarshal.Cast<byte, T>(this.Data.AsSpan()).Slice(0, this.length); return MemoryMarshal.Cast<byte, T>(this.Data.AsSpan()).Slice(0, this.length);
@ -74,6 +75,12 @@ namespace SixLabors.ImageSharp.Memory
} }
protected override object GetPinnableObject() => this.Data; protected override object GetPinnableObject() => this.Data;
[MethodImpl(InliningOptions.ColdPath)]
private static void ThrowObjectDisposedException()
{
throw new ObjectDisposedException("ArrayPoolMemoryAllocator.Buffer<T>");
}
} }
/// <summary> /// <summary>

8
src/ImageSharp/Memory/Allocators/ArrayPoolMemoryAllocator.cs

@ -133,8 +133,7 @@ namespace SixLabors.ImageSharp.Memory
int bufferSizeInBytes = length * itemSizeBytes; int bufferSizeInBytes = length * itemSizeBytes;
if (bufferSizeInBytes < 0 || bufferSizeInBytes > this.BufferCapacityInBytes) if (bufferSizeInBytes < 0 || bufferSizeInBytes > this.BufferCapacityInBytes)
{ {
throw new InvalidMemoryOperationException( ThrowInvalidAllocationException<T>(length);
$"Requested allocation: {length} elements of {typeof(T).Name} is over the capacity of the MemoryAllocator.");
} }
ArrayPool<byte> pool = this.GetArrayPool(bufferSizeInBytes); ArrayPool<byte> pool = this.GetArrayPool(bufferSizeInBytes);
@ -171,6 +170,11 @@ namespace SixLabors.ImageSharp.Memory
return maxPoolSizeInBytes / 4; return maxPoolSizeInBytes / 4;
} }
[MethodImpl(InliningOptions.ColdPath)]
private static void ThrowInvalidAllocationException<T>(int length) =>
throw new InvalidMemoryOperationException(
$"Requested allocation: {length} elements of {typeof(T).Name} is over the capacity of the MemoryAllocator.");
private ArrayPool<byte> GetArrayPool(int bufferSizeInBytes) private ArrayPool<byte> GetArrayPool(int bufferSizeInBytes)
{ {
return bufferSizeInBytes <= this.PoolSelectorThresholdInBytes ? this.normalArrayPool : this.largeArrayPool; return bufferSizeInBytes <= this.PoolSelectorThresholdInBytes ? this.normalArrayPool : this.largeArrayPool;

46
src/ImageSharp/Memory/Buffer2DExtensions.cs

@ -27,52 +27,6 @@ namespace SixLabors.ImageSharp.Memory
return buffer.FastMemoryGroup.View; return buffer.FastMemoryGroup.View;
} }
/// <summary>
/// Gets a <see cref="Span{T}"/> to the backing data of <paramref name="buffer"/>
/// if the backing group consists of one single contiguous memory buffer.
/// Throws <see cref="InvalidOperationException"/> otherwise.
/// </summary>
/// <param name="buffer">The <see cref="Buffer2D{T}"/>.</param>
/// <typeparam name="T">The value type.</typeparam>
/// <returns>The <see cref="Span{T}"/> referencing the memory area.</returns>
/// <exception cref="InvalidOperationException">
/// Thrown when the backing group is discontiguous.
/// </exception>
internal static Span<T> GetSingleSpan<T>(this Buffer2D<T> buffer)
where T : struct
{
Guard.NotNull(buffer, nameof(buffer));
if (buffer.FastMemoryGroup.Count > 1)
{
throw new InvalidOperationException("GetSingleSpan is only valid for a single-buffer group!");
}
return buffer.FastMemoryGroup.Single().Span;
}
/// <summary>
/// Gets a <see cref="Memory{T}"/> to the backing data of <paramref name="buffer"/>
/// if the backing group consists of one single contiguous memory buffer.
/// Throws <see cref="InvalidOperationException"/> otherwise.
/// </summary>
/// <param name="buffer">The <see cref="Buffer2D{T}"/>.</param>
/// <typeparam name="T">The value type.</typeparam>
/// <returns>The <see cref="Memory{T}"/>.</returns>
/// <exception cref="InvalidOperationException">
/// Thrown when the backing group is discontiguous.
/// </exception>
internal static Memory<T> GetSingleMemory<T>(this Buffer2D<T> buffer)
where T : struct
{
Guard.NotNull(buffer, nameof(buffer));
if (buffer.FastMemoryGroup.Count > 1)
{
throw new InvalidOperationException("GetSingleMemory is only valid for a single-buffer group!");
}
return buffer.FastMemoryGroup.Single();
}
/// <summary> /// <summary>
/// TODO: Does not work with multi-buffer groups, should be specific to Resize. /// TODO: Does not work with multi-buffer groups, should be specific to Resize.
/// Copy <paramref name="columnCount"/> columns of <paramref name="buffer"/> inplace, /// Copy <paramref name="columnCount"/> columns of <paramref name="buffer"/> inplace,

37
src/ImageSharp/Memory/Buffer2D{T}.cs

@ -2,6 +2,7 @@
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System; using System;
using System.Linq;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
@ -158,6 +159,36 @@ namespace SixLabors.ImageSharp.Memory
return this.FastMemoryGroup.View.GetBoundedSlice(y * this.Width, this.Width); return this.FastMemoryGroup.View.GetBoundedSlice(y * this.Width, this.Width);
} }
/// <summary>
/// Gets a <see cref="Span{T}"/> to the backing data if the backing group consists of a single contiguous memory buffer.
/// Throws <see cref="InvalidOperationException"/> otherwise.
/// </summary>
/// <returns>The <see cref="Span{T}"/> referencing the memory area.</returns>
/// <exception cref="InvalidOperationException">
/// Thrown when the backing group is discontiguous.
/// </exception>
[MethodImpl(InliningOptions.ShortMethod)]
internal Span<T> GetSingleSpan()
{
// TODO: If we need a public version of this method, we need to cache the non-fast Memory<T> of this.MemoryGroup
return this.cachedMemory.Length != 0 ? this.cachedMemory.Span : this.GetSingleSpanSlow();
}
/// <summary>
/// Gets a <see cref="Memory{T}"/> to the backing data of if the backing group consists of a single contiguous memory buffer.
/// Throws <see cref="InvalidOperationException"/> otherwise.
/// </summary>
/// <returns>The <see cref="Memory{T}"/>.</returns>
/// <exception cref="InvalidOperationException">
/// Thrown when the backing group is discontiguous.
/// </exception>
[MethodImpl(InliningOptions.ShortMethod)]
internal Memory<T> GetSingleMemory()
{
// TODO: If we need a public version of this method, we need to cache the non-fast Memory<T> of this.MemoryGroup
return this.cachedMemory.Length != 0 ? this.cachedMemory : this.GetSingleMemorySlow();
}
/// <summary> /// <summary>
/// Swaps the contents of 'destination' with 'source' if the buffers are owned (1), /// Swaps the contents of 'destination' with 'source' if the buffers are owned (1),
/// copies the contents of 'source' to 'destination' otherwise (2). Buffers should be of same size in case 2! /// copies the contents of 'source' to 'destination' otherwise (2). Buffers should be of same size in case 2!
@ -171,6 +202,12 @@ namespace SixLabors.ImageSharp.Memory
[MethodImpl(InliningOptions.ColdPath)] [MethodImpl(InliningOptions.ColdPath)]
private Memory<T> GetRowMemorySlow(int y) => this.FastMemoryGroup.GetBoundedSlice(y * this.Width, this.Width); private Memory<T> GetRowMemorySlow(int y) => this.FastMemoryGroup.GetBoundedSlice(y * this.Width, this.Width);
[MethodImpl(InliningOptions.ColdPath)]
private Memory<T> GetSingleMemorySlow() => this.FastMemoryGroup.Single();
[MethodImpl(InliningOptions.ColdPath)]
private Span<T> GetSingleSpanSlow() => this.FastMemoryGroup.Single().Span;
[MethodImpl(InliningOptions.ColdPath)] [MethodImpl(InliningOptions.ColdPath)]
private ref T GetElementSlow(int x, int y) private ref T GetElementSlow(int x, int y)
{ {

18
src/ImageSharp/Memory/BufferArea{T}.cs

@ -79,8 +79,12 @@ namespace SixLabors.ImageSharp.Memory
/// </summary> /// </summary>
/// <returns>The reference to the [0,0] element</returns> /// <returns>The reference to the [0,0] element</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public ref T GetReferenceToOrigin() => public ref T GetReferenceToOrigin()
ref this.GetRowSpan(0)[0]; {
int y = this.Rectangle.Y;
int x = this.Rectangle.X;
return ref this.DestinationBuffer.GetRowSpan(y)[x];
}
/// <summary> /// <summary>
/// Gets a span to row 'y' inside this area. /// Gets a span to row 'y' inside this area.
@ -90,11 +94,11 @@ namespace SixLabors.ImageSharp.Memory
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public Span<T> GetRowSpan(int y) public Span<T> GetRowSpan(int y)
{ {
int yy = this.GetRowIndex(y); int yy = this.Rectangle.Y + y;
int xx = this.Rectangle.X; int xx = this.Rectangle.X;
int width = this.Rectangle.Width; int width = this.Rectangle.Width;
return this.DestinationBuffer.FastMemoryGroup.GetBoundedSlice(yy + xx, width).Span; return this.DestinationBuffer.GetRowSpan(yy).Slice(xx, width);
} }
/// <summary> /// <summary>
@ -129,12 +133,6 @@ namespace SixLabors.ImageSharp.Memory
return new BufferArea<T>(this.DestinationBuffer, rectangle); return new BufferArea<T>(this.DestinationBuffer, rectangle);
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal int GetRowIndex(int y)
{
return (y + this.Rectangle.Y) * this.DestinationBuffer.Width;
}
public void Clear() public void Clear()
{ {
// Optimization for when the size of the area is the same as the buffer size. // Optimization for when the size of the area is the same as the buffer size.

4
src/ImageSharp/PixelFormats/PixelImplementations/Rgba32.PixelOperations.cs

@ -29,7 +29,7 @@ namespace SixLabors.ImageSharp.PixelFormats
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationVectors, nameof(destinationVectors)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationVectors, nameof(destinationVectors));
destinationVectors = destinationVectors.Slice(0, sourcePixels.Length); destinationVectors = destinationVectors.Slice(0, sourcePixels.Length);
SimdUtils.BulkConvertByteToNormalizedFloat( SimdUtils.ByteToNormalizedFloat(
MemoryMarshal.Cast<Rgba32, byte>(sourcePixels), MemoryMarshal.Cast<Rgba32, byte>(sourcePixels),
MemoryMarshal.Cast<Vector4, float>(destinationVectors)); MemoryMarshal.Cast<Vector4, float>(destinationVectors));
Vector4Converters.ApplyForwardConversionModifiers(destinationVectors, modifiers); Vector4Converters.ApplyForwardConversionModifiers(destinationVectors, modifiers);
@ -46,7 +46,7 @@ namespace SixLabors.ImageSharp.PixelFormats
destinationPixels = destinationPixels.Slice(0, sourceVectors.Length); destinationPixels = destinationPixels.Slice(0, sourceVectors.Length);
Vector4Converters.ApplyBackwardConversionModifiers(sourceVectors, modifiers); Vector4Converters.ApplyBackwardConversionModifiers(sourceVectors, modifiers);
SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows( SimdUtils.NormalizedFloatToByteSaturate(
MemoryMarshal.Cast<Vector4, float>(sourceVectors), MemoryMarshal.Cast<Vector4, float>(sourceVectors),
MemoryMarshal.Cast<Rgba32, byte>(destinationPixels)); MemoryMarshal.Cast<Rgba32, byte>(destinationPixels));
} }

6
src/ImageSharp/PixelFormats/Utils/Vector4Converters.RgbaCompatible.cs

@ -62,7 +62,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
// 'destVectors' and 'lastQuarterOfDestBuffer' are overlapping buffers, // 'destVectors' and 'lastQuarterOfDestBuffer' are overlapping buffers,
// but we are always reading/writing at different positions: // but we are always reading/writing at different positions:
SimdUtils.BulkConvertByteToNormalizedFloat( SimdUtils.ByteToNormalizedFloat(
MemoryMarshal.Cast<Rgba32, byte>(lastQuarterOfDestBuffer), MemoryMarshal.Cast<Rgba32, byte>(lastQuarterOfDestBuffer),
MemoryMarshal.Cast<Vector4, float>(destVectors.Slice(0, countWithoutLastItem))); MemoryMarshal.Cast<Vector4, float>(destVectors.Slice(0, countWithoutLastItem)));
@ -107,7 +107,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
{ {
Span<Rgba32> tempSpan = tempBuffer.Memory.Span; Span<Rgba32> tempSpan = tempBuffer.Memory.Span;
SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows( SimdUtils.NormalizedFloatToByteSaturate(
MemoryMarshal.Cast<Vector4, float>(sourceVectors), MemoryMarshal.Cast<Vector4, float>(sourceVectors),
MemoryMarshal.Cast<Rgba32, byte>(tempSpan)); MemoryMarshal.Cast<Rgba32, byte>(tempSpan));
@ -122,7 +122,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
return int.MaxValue; return int.MaxValue;
} }
return SimdUtils.ExtendedIntrinsics.IsAvailable && SimdUtils.IsAvx2CompatibleArchitecture ? 256 : 128; return SimdUtils.ExtendedIntrinsics.IsAvailable && SimdUtils.HasVector8 ? 256 : 128;
} }
} }
} }

18
src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs

@ -3,6 +3,7 @@
using System; using System;
using System.Buffers; using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Memory;
@ -249,12 +250,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Transforms
private unsafe ResizeKernel CreateKernel(int dataRowIndex, int left, int right) private unsafe ResizeKernel CreateKernel(int dataRowIndex, int left, int right)
{ {
int length = right - left + 1; int length = right - left + 1;
this.ValidateSizesForCreateKernel(length, dataRowIndex, left, right);
if (length > this.data.Width)
{
throw new InvalidOperationException(
$"Error in KernelMap.CreateKernel({dataRowIndex},{left},{right}): left > this.data.Width");
}
Span<float> rowSpan = this.data.GetRowSpan(dataRowIndex); Span<float> rowSpan = this.data.GetRowSpan(dataRowIndex);
@ -262,5 +258,15 @@ namespace SixLabors.ImageSharp.Processing.Processors.Transforms
float* rowPtr = (float*)Unsafe.AsPointer(ref rowReference); float* rowPtr = (float*)Unsafe.AsPointer(ref rowReference);
return new ResizeKernel(left, rowPtr, length); return new ResizeKernel(left, rowPtr, length);
} }
[Conditional("DEBUG")]
private void ValidateSizesForCreateKernel(int length, int dataRowIndex, int left, int right)
{
if (length > this.data.Width)
{
throw new InvalidOperationException(
$"Error in KernelMap.CreateKernel({dataRowIndex},{left},{right}): left > this.data.Width");
}
}
} }
} }

303
tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_CopyTo1x1.cs

@ -4,41 +4,66 @@
using System; using System;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Formats.Jpeg.Components;
using SixLabors.ImageSharp.Memory;
// ReSharper disable InconsistentNaming // ReSharper disable InconsistentNaming
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
{ {
public class Block8x8F_CopyTo1x1 public unsafe class Block8x8F_CopyTo1x1
{ {
private Block8x8F block; private Block8x8F block;
private readonly Block8x8F[] blockArray = new Block8x8F[1];
private Buffer2D<float> buffer; private static readonly int Width = 100;
private BufferArea<float> destArea; private float[] buffer = new float[Width * 500];
private readonly float[] unpinnedBuffer = new float[Width * 500];
private GCHandle bufferHandle;
private GCHandle blockHandle;
private float* bufferPtr;
private float* blockPtr;
[GlobalSetup] [GlobalSetup]
public void Setup() public void Setup()
{ {
if (!SimdUtils.IsAvx2CompatibleArchitecture) if (!SimdUtils.HasVector8)
{ {
throw new InvalidOperationException("Benchmark Block8x8F_CopyTo1x1 is invalid on platforms without AVX2 support."); throw new InvalidOperationException("Benchmark Block8x8F_CopyTo1x1 is invalid on platforms without AVX2 support.");
} }
this.buffer = Configuration.Default.MemoryAllocator.Allocate2D<float>(1000, 500); this.bufferHandle = GCHandle.Alloc(this.buffer, GCHandleType.Pinned);
this.destArea = this.buffer.GetArea(200, 100, 64, 64); this.bufferPtr = (float*)this.bufferHandle.AddrOfPinnedObject();
// Pin self so we can take address of to the block:
this.blockHandle = GCHandle.Alloc(this.blockArray, GCHandleType.Pinned);
this.blockPtr = (float*)Unsafe.AsPointer(ref this.block);
}
[GlobalCleanup]
public void Cleanup()
{
this.bufferPtr = null;
this.blockPtr = null;
this.bufferHandle.Free();
this.blockHandle.Free();
this.buffer = null;
} }
[Benchmark(Baseline = true)] [Benchmark(Baseline = true)]
public void Original() public void Original()
{ {
ref byte selfBase = ref Unsafe.As<Block8x8F, byte>(ref this.block); ref byte selfBase = ref Unsafe.As<Block8x8F, byte>(ref this.block);
ref byte destBase = ref Unsafe.As<float, byte>(ref this.destArea.GetReferenceToOrigin()); ref byte destBase = ref Unsafe.AsRef<byte>(this.bufferPtr);
int destStride = this.destArea.Stride * sizeof(float); int destStride = Width * sizeof(float);
CopyRowImpl(ref selfBase, ref destBase, destStride, 0); CopyRowImpl(ref selfBase, ref destBase, destStride, 0);
CopyRowImpl(ref selfBase, ref destBase, destStride, 1); CopyRowImpl(ref selfBase, ref destBase, destStride, 1);
@ -58,12 +83,12 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
Unsafe.CopyBlock(ref d, ref s, 8 * sizeof(float)); Unsafe.CopyBlock(ref d, ref s, 8 * sizeof(float));
} }
[Benchmark] // [Benchmark]
public void UseVector8() public void UseVector8()
{ {
ref Block8x8F s = ref this.block; ref Block8x8F s = ref this.block;
ref float origin = ref this.destArea.GetReferenceToOrigin(); ref float origin = ref Unsafe.AsRef<float>(this.bufferPtr);
int stride = this.destArea.Stride; int stride = Width;
ref Vector<float> d0 = ref Unsafe.As<float, Vector<float>>(ref origin); ref Vector<float> d0 = ref Unsafe.As<float, Vector<float>>(ref origin);
ref Vector<float> d1 = ref Unsafe.As<float, Vector<float>>(ref Unsafe.Add(ref origin, stride)); ref Vector<float> d1 = ref Unsafe.As<float, Vector<float>>(ref Unsafe.Add(ref origin, stride));
@ -93,12 +118,12 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
d7 = row7; d7 = row7;
} }
[Benchmark] // [Benchmark]
public void UseVector8_V2() public void UseVector8_V2()
{ {
ref Block8x8F s = ref this.block; ref Block8x8F s = ref this.block;
ref float origin = ref this.destArea.GetReferenceToOrigin(); ref float origin = ref Unsafe.AsRef<float>(this.bufferPtr);
int stride = this.destArea.Stride; int stride = Width;
ref Vector<float> d0 = ref Unsafe.As<float, Vector<float>>(ref origin); ref Vector<float> d0 = ref Unsafe.As<float, Vector<float>>(ref origin);
ref Vector<float> d1 = ref Unsafe.As<float, Vector<float>>(ref Unsafe.Add(ref origin, stride)); ref Vector<float> d1 = ref Unsafe.As<float, Vector<float>>(ref Unsafe.Add(ref origin, stride));
@ -119,15 +144,247 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
d7 = Unsafe.As<Vector4, Vector<float>>(ref s.V7L); d7 = Unsafe.As<Vector4, Vector<float>>(ref s.V7L);
} }
// RESULTS: [Benchmark]
public void UseVector8_V3()
{
int stride = Width * sizeof(float);
ref float d = ref this.unpinnedBuffer[0];
ref Vector<float> s = ref Unsafe.As<Block8x8F, Vector<float>>(ref this.block);
Vector<float> v0 = s;
Vector<float> v1 = Unsafe.AddByteOffset(ref s, (IntPtr)1);
Vector<float> v2 = Unsafe.AddByteOffset(ref s, (IntPtr)2);
Vector<float> v3 = Unsafe.AddByteOffset(ref s, (IntPtr)3);
Unsafe.As<float, Vector<float>>(ref d) = v0;
Unsafe.As<float, Vector<float>>(ref Unsafe.AddByteOffset(ref d, (IntPtr)stride)) = v1;
Unsafe.As<float, Vector<float>>(ref Unsafe.AddByteOffset(ref d, (IntPtr)(stride * 2))) = v2;
Unsafe.As<float, Vector<float>>(ref Unsafe.AddByteOffset(ref d, (IntPtr)(stride * 3))) = v3;
v0 = Unsafe.AddByteOffset(ref s, (IntPtr)4);
v1 = Unsafe.AddByteOffset(ref s, (IntPtr)5);
v2 = Unsafe.AddByteOffset(ref s, (IntPtr)6);
v3 = Unsafe.AddByteOffset(ref s, (IntPtr)7);
Unsafe.As<float, Vector<float>>(ref Unsafe.AddByteOffset(ref d, (IntPtr)(stride * 4))) = v0;
Unsafe.As<float, Vector<float>>(ref Unsafe.AddByteOffset(ref d, (IntPtr)(stride * 5))) = v1;
Unsafe.As<float, Vector<float>>(ref Unsafe.AddByteOffset(ref d, (IntPtr)(stride * 6))) = v2;
Unsafe.As<float, Vector<float>>(ref Unsafe.AddByteOffset(ref d, (IntPtr)(stride * 7))) = v3;
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Benchmark]
public void UseVector256_Avx2_Variant1()
{
int stride = Width;
float* d = this.bufferPtr;
float* s = this.blockPtr;
Vector256<float> v;
v = Avx.LoadVector256(s);
Avx.Store(d, v);
v = Avx.LoadVector256(s + 8);
Avx.Store(d + stride, v);
v = Avx.LoadVector256(s + (8 * 2));
Avx.Store(d + (stride * 2), v);
v = Avx.LoadVector256(s + (8 * 3));
Avx.Store(d + (stride * 3), v);
v = Avx.LoadVector256(s + (8 * 4));
Avx.Store(d + (stride * 4), v);
v = Avx.LoadVector256(s + (8 * 5));
Avx.Store(d + (stride * 5), v);
v = Avx.LoadVector256(s + (8 * 6));
Avx.Store(d + (stride * 6), v);
v = Avx.LoadVector256(s + (8 * 7));
Avx.Store(d + (stride * 7), v);
}
[Benchmark]
public void UseVector256_Avx2_Variant2()
{
int stride = Width;
float* d = this.bufferPtr;
float* s = this.blockPtr;
Vector256<float> v0 = Avx.LoadVector256(s);
Vector256<float> v1 = Avx.LoadVector256(s + 8);
Vector256<float> v2 = Avx.LoadVector256(s + (8 * 2));
Vector256<float> v3 = Avx.LoadVector256(s + (8 * 3));
Vector256<float> v4 = Avx.LoadVector256(s + (8 * 4));
Vector256<float> v5 = Avx.LoadVector256(s + (8 * 5));
Vector256<float> v6 = Avx.LoadVector256(s + (8 * 6));
Vector256<float> v7 = Avx.LoadVector256(s + (8 * 7));
Avx.Store(d, v0);
Avx.Store(d + stride, v1);
Avx.Store(d + (stride * 2), v2);
Avx.Store(d + (stride * 3), v3);
Avx.Store(d + (stride * 4), v4);
Avx.Store(d + (stride * 5), v5);
Avx.Store(d + (stride * 6), v6);
Avx.Store(d + (stride * 7), v7);
}
[Benchmark]
public void UseVector256_Avx2_Variant3()
{
int stride = Width;
float* d = this.bufferPtr;
float* s = this.blockPtr;
Vector256<float> v0 = Avx.LoadVector256(s);
Vector256<float> v1 = Avx.LoadVector256(s + 8);
Vector256<float> v2 = Avx.LoadVector256(s + (8 * 2));
Vector256<float> v3 = Avx.LoadVector256(s + (8 * 3));
Avx.Store(d, v0);
Avx.Store(d + stride, v1);
Avx.Store(d + (stride * 2), v2);
Avx.Store(d + (stride * 3), v3);
v0 = Avx.LoadVector256(s + (8 * 4));
v1 = Avx.LoadVector256(s + (8 * 5));
v2 = Avx.LoadVector256(s + (8 * 6));
v3 = Avx.LoadVector256(s + (8 * 7));
Avx.Store(d + (stride * 4), v0);
Avx.Store(d + (stride * 5), v1);
Avx.Store(d + (stride * 6), v2);
Avx.Store(d + (stride * 7), v3);
}
[Benchmark]
public void UseVector256_Avx2_Variant3_RefCast()
{
int stride = Width;
ref float d = ref this.unpinnedBuffer[0];
ref Vector256<float> s = ref Unsafe.As<Block8x8F, Vector256<float>>(ref this.block);
Vector256<float> v0 = s;
Vector256<float> v1 = Unsafe.Add(ref s, 1);
Vector256<float> v2 = Unsafe.Add(ref s, 2);
Vector256<float> v3 = Unsafe.Add(ref s, 3);
Unsafe.As<float, Vector256<float>>(ref d) = v0;
Unsafe.As<float, Vector256<float>>(ref Unsafe.Add(ref d, stride)) = v1;
Unsafe.As<float, Vector256<float>>(ref Unsafe.Add(ref d, stride * 2)) = v2;
Unsafe.As<float, Vector256<float>>(ref Unsafe.Add(ref d, stride * 3)) = v3;
v0 = Unsafe.Add(ref s, 4);
v1 = Unsafe.Add(ref s, 5);
v2 = Unsafe.Add(ref s, 6);
v3 = Unsafe.Add(ref s, 7);
Unsafe.As<float, Vector256<float>>(ref Unsafe.Add(ref d, stride * 4)) = v0;
Unsafe.As<float, Vector256<float>>(ref Unsafe.Add(ref d, stride * 5)) = v1;
Unsafe.As<float, Vector256<float>>(ref Unsafe.Add(ref d, stride * 6)) = v2;
Unsafe.As<float, Vector256<float>>(ref Unsafe.Add(ref d, stride * 7)) = v3;
}
[Benchmark]
public void UseVector256_Avx2_Variant3_RefCast_Mod()
{
int stride = Width * sizeof(float);
ref float d = ref this.unpinnedBuffer[0];
ref Vector256<float> s = ref Unsafe.As<Block8x8F, Vector256<float>>(ref this.block);
Vector256<float> v0 = s;
Vector256<float> v1 = Unsafe.AddByteOffset(ref s, (IntPtr)1);
Vector256<float> v2 = Unsafe.AddByteOffset(ref s, (IntPtr)2);
Vector256<float> v3 = Unsafe.AddByteOffset(ref s, (IntPtr)3);
Unsafe.As<float, Vector256<float>>(ref d) = v0;
Unsafe.As<float, Vector256<float>>(ref Unsafe.AddByteOffset(ref d, (IntPtr)stride)) = v1;
Unsafe.As<float, Vector256<float>>(ref Unsafe.AddByteOffset(ref d, (IntPtr)(stride * 2))) = v2;
Unsafe.As<float, Vector256<float>>(ref Unsafe.AddByteOffset(ref d, (IntPtr)(stride * 3))) = v3;
v0 = Unsafe.AddByteOffset(ref s, (IntPtr)4);
v1 = Unsafe.AddByteOffset(ref s, (IntPtr)5);
v2 = Unsafe.AddByteOffset(ref s, (IntPtr)6);
v3 = Unsafe.AddByteOffset(ref s, (IntPtr)7);
Unsafe.As<float, Vector256<float>>(ref Unsafe.AddByteOffset(ref d, (IntPtr)(stride * 4))) = v0;
Unsafe.As<float, Vector256<float>>(ref Unsafe.AddByteOffset(ref d, (IntPtr)(stride * 5))) = v1;
Unsafe.As<float, Vector256<float>>(ref Unsafe.AddByteOffset(ref d, (IntPtr)(stride * 6))) = v2;
Unsafe.As<float, Vector256<float>>(ref Unsafe.AddByteOffset(ref d, (IntPtr)(stride * 7))) = v3;
}
// [Benchmark]
public void UseVector256_Avx2_Variant3_WithLocalPinning()
{
int stride = Width;
fixed (float* d = this.unpinnedBuffer)
fixed (Block8x8F* ss = &this.block)
{
var s = (float*)ss;
Vector256<float> v0 = Avx.LoadVector256(s);
Vector256<float> v1 = Avx.LoadVector256(s + 8);
Vector256<float> v2 = Avx.LoadVector256(s + (8 * 2));
Vector256<float> v3 = Avx.LoadVector256(s + (8 * 3));
Avx.Store(d, v0);
Avx.Store(d + stride, v1);
Avx.Store(d + (stride * 2), v2);
Avx.Store(d + (stride * 3), v3);
v0 = Avx.LoadVector256(s + (8 * 4));
v1 = Avx.LoadVector256(s + (8 * 5));
v2 = Avx.LoadVector256(s + (8 * 6));
v3 = Avx.LoadVector256(s + (8 * 7));
Avx.Store(d + (stride * 4), v0);
Avx.Store(d + (stride * 5), v1);
Avx.Store(d + (stride * 6), v2);
Avx.Store(d + (stride * 7), v3);
}
}
// [Benchmark]
public void UseVector256_Avx2_Variant3_sbyte()
{
int stride = Width * 4;
var d = (sbyte*)this.bufferPtr;
var s = (sbyte*)this.blockPtr;
Vector256<sbyte> v0 = Avx.LoadVector256(s);
Vector256<sbyte> v1 = Avx.LoadVector256(s + 32);
Vector256<sbyte> v2 = Avx.LoadVector256(s + (32 * 2));
Vector256<sbyte> v3 = Avx.LoadVector256(s + (32 * 3));
Avx.Store(d, v0);
Avx.Store(d + stride, v1);
Avx.Store(d + (stride * 2), v2);
Avx.Store(d + (stride * 3), v3);
v0 = Avx.LoadVector256(s + (32 * 4));
v1 = Avx.LoadVector256(s + (32 * 5));
v2 = Avx.LoadVector256(s + (32 * 6));
v3 = Avx.LoadVector256(s + (32 * 7));
Avx.Store(d + (stride * 4), v0);
Avx.Store(d + (stride * 5), v1);
Avx.Store(d + (stride * 6), v2);
Avx.Store(d + (stride * 7), v3);
}
#endif
// *** RESULTS 02/2020 ***
// BenchmarkDotNet=v0.12.0, OS=Windows 10.0.18363
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.200-preview-014971
// [Host] : .NET Core 3.1.2 (CoreCLR 4.700.20.6602, CoreFX 4.700.20.6702), X64 RyuJIT
// DefaultJob : .NET Core 3.1.2 (CoreCLR 4.700.20.6602, CoreFX 4.700.20.6702), X64 RyuJIT
// //
// Method | Mean | Error | StdDev | Scaled |
// -------------- |---------:|----------:|----------:|-------:|
// Original | 22.53 ns | 0.1660 ns | 0.1553 ns | 1.00 |
// UseVector8 | 21.59 ns | 0.3079 ns | 0.2571 ns | 0.96 |
// UseVector8_V2 | 22.57 ns | 0.1699 ns | 0.1506 ns | 1.00 |
// //
// Conclusion: // | Method | Mean | Error | StdDev | Ratio | RatioSD |
// Doesn't worth to bother with this // |--------------------------------------- |---------:|----------:|----------:|------:|--------:|
// | Original | 4.012 ns | 0.0567 ns | 0.0531 ns | 1.00 | 0.00 |
// | UseVector8_V3 | 4.013 ns | 0.0947 ns | 0.0840 ns | 1.00 | 0.03 |
// | UseVector256_Avx2_Variant1 | 2.546 ns | 0.0376 ns | 0.0314 ns | 0.63 | 0.01 |
// | UseVector256_Avx2_Variant2 | 2.643 ns | 0.0162 ns | 0.0151 ns | 0.66 | 0.01 |
// | UseVector256_Avx2_Variant3 | 2.520 ns | 0.0760 ns | 0.0813 ns | 0.63 | 0.02 |
// | UseVector256_Avx2_Variant3_RefCast | 2.300 ns | 0.0877 ns | 0.0938 ns | 0.58 | 0.03 |
// | UseVector256_Avx2_Variant3_RefCast_Mod | 2.139 ns | 0.0698 ns | 0.0686 ns | 0.53 | 0.02 |
} }
} }

442
tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Round.cs

@ -4,6 +4,12 @@
using System; using System;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Attributes;
@ -12,10 +18,14 @@ using SixLabors.ImageSharp.Formats.Jpeg.Components;
// ReSharper disable InconsistentNaming // ReSharper disable InconsistentNaming
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
{ {
public class Block8x8F_Round public unsafe class Block8x8F_Round
{ {
private Block8x8F block; private Block8x8F block;
private readonly byte[] blockBuffer = new byte[512];
private GCHandle blockHandle;
private float* alignedPtr;
[GlobalSetup] [GlobalSetup]
public void Setup() public void Setup()
{ {
@ -24,13 +34,27 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
throw new NotSupportedException("Vector<float>.Count != 8"); throw new NotSupportedException("Vector<float>.Count != 8");
} }
for (int i = 0; i < Block8x8F.Size; i++) this.blockHandle = GCHandle.Alloc(this.blockBuffer, GCHandleType.Pinned);
ulong ptr = (ulong)this.blockHandle.AddrOfPinnedObject();
ptr += 16;
ptr -= ptr % 16;
if (ptr % 16 != 0)
{ {
this.block[i] = i * 44.8f; throw new Exception("ptr is unaligned");
} }
this.alignedPtr = (float*)ptr;
} }
[Benchmark(Baseline = true)] [GlobalCleanup]
public void Cleanup()
{
this.blockHandle.Free();
this.alignedPtr = null;
}
[Benchmark]
public void ScalarRound() public void ScalarRound()
{ {
ref float b = ref Unsafe.As<Block8x8F, float>(ref this.block); ref float b = ref Unsafe.As<Block8x8F, float>(ref this.block);
@ -42,8 +66,8 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
} }
} }
[Benchmark] [Benchmark(Baseline = true)]
public void SimdRound() public void SimdUtils_FastRound_Vector8()
{ {
ref Block8x8F b = ref this.block; ref Block8x8F b = ref this.block;
@ -64,5 +88,411 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
ref Vector<float> row7 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V7L); ref Vector<float> row7 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V7L);
row7 = SimdUtils.FastRound(row7); row7 = SimdUtils.FastRound(row7);
} }
[Benchmark]
public void SimdUtils_FastRound_Vector8_ForceAligned()
{
ref Block8x8F b = ref Unsafe.AsRef<Block8x8F>(this.alignedPtr);
ref Vector<float> row0 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V0L);
row0 = SimdUtils.FastRound(row0);
ref Vector<float> row1 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V1L);
row1 = SimdUtils.FastRound(row1);
ref Vector<float> row2 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V2L);
row2 = SimdUtils.FastRound(row2);
ref Vector<float> row3 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V3L);
row3 = SimdUtils.FastRound(row3);
ref Vector<float> row4 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V4L);
row4 = SimdUtils.FastRound(row4);
ref Vector<float> row5 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V5L);
row5 = SimdUtils.FastRound(row5);
ref Vector<float> row6 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V6L);
row6 = SimdUtils.FastRound(row6);
ref Vector<float> row7 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V7L);
row7 = SimdUtils.FastRound(row7);
}
[Benchmark]
public void SimdUtils_FastRound_Vector8_Grouped()
{
ref Block8x8F b = ref this.block;
ref Vector<float> row0 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V0L);
ref Vector<float> row1 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V1L);
ref Vector<float> row2 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V2L);
ref Vector<float> row3 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V3L);
row0 = SimdUtils.FastRound(row0);
row1 = SimdUtils.FastRound(row1);
row2 = SimdUtils.FastRound(row2);
row3 = SimdUtils.FastRound(row3);
row0 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V4L);
row1 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V5L);
row2 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V6L);
row3 = ref Unsafe.As<Vector4, Vector<float>>(ref b.V7L);
row0 = SimdUtils.FastRound(row0);
row1 = SimdUtils.FastRound(row1);
row2 = SimdUtils.FastRound(row2);
row3 = SimdUtils.FastRound(row3);
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Benchmark]
public void Sse41_V1()
{
ref Vector128<float> b0 = ref Unsafe.As<Block8x8F, Vector128<float>>(ref this.block);
ref Vector128<float> p = ref b0;
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 1);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 2);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 3);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 4);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 5);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 6);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 7);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 8);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 9);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 10);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 11);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 12);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 13);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 14);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.Add(ref b0, 15);
p = Sse41.RoundToNearestInteger(p);
}
[Benchmark]
public unsafe void Sse41_V2()
{
ref Vector128<float> p = ref Unsafe.As<Block8x8F, Vector128<float>>(ref this.block);
p = Sse41.RoundToNearestInteger(p);
var offset = (IntPtr)sizeof(Vector128<float>);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
}
[Benchmark]
public unsafe void Sse41_V3()
{
ref Vector128<float> p = ref Unsafe.As<Block8x8F, Vector128<float>>(ref this.block);
p = Sse41.RoundToNearestInteger(p);
var offset = (IntPtr)sizeof(Vector128<float>);
for (int i = 0; i < 15; i++)
{
p = ref Unsafe.AddByteOffset(ref p, offset);
p = Sse41.RoundToNearestInteger(p);
}
}
[Benchmark]
public unsafe void Sse41_V4()
{
ref Vector128<float> p = ref Unsafe.As<Block8x8F, Vector128<float>>(ref this.block);
var offset = (IntPtr)sizeof(Vector128<float>);
ref Vector128<float> a = ref p;
ref Vector128<float> b = ref Unsafe.AddByteOffset(ref a, offset);
ref Vector128<float> c = ref Unsafe.AddByteOffset(ref b, offset);
ref Vector128<float> d = ref Unsafe.AddByteOffset(ref c, offset);
a = Sse41.RoundToNearestInteger(a);
b = Sse41.RoundToNearestInteger(b);
c = Sse41.RoundToNearestInteger(c);
d = Sse41.RoundToNearestInteger(d);
a = ref Unsafe.AddByteOffset(ref d, offset);
b = ref Unsafe.AddByteOffset(ref a, offset);
c = ref Unsafe.AddByteOffset(ref b, offset);
d = ref Unsafe.AddByteOffset(ref c, offset);
a = Sse41.RoundToNearestInteger(a);
b = Sse41.RoundToNearestInteger(b);
c = Sse41.RoundToNearestInteger(c);
d = Sse41.RoundToNearestInteger(d);
a = ref Unsafe.AddByteOffset(ref d, offset);
b = ref Unsafe.AddByteOffset(ref a, offset);
c = ref Unsafe.AddByteOffset(ref b, offset);
d = ref Unsafe.AddByteOffset(ref c, offset);
a = Sse41.RoundToNearestInteger(a);
b = Sse41.RoundToNearestInteger(b);
c = Sse41.RoundToNearestInteger(c);
d = Sse41.RoundToNearestInteger(d);
a = ref Unsafe.AddByteOffset(ref d, offset);
b = ref Unsafe.AddByteOffset(ref a, offset);
c = ref Unsafe.AddByteOffset(ref b, offset);
d = ref Unsafe.AddByteOffset(ref c, offset);
a = Sse41.RoundToNearestInteger(a);
b = Sse41.RoundToNearestInteger(b);
c = Sse41.RoundToNearestInteger(c);
d = Sse41.RoundToNearestInteger(d);
}
[Benchmark]
public unsafe void Sse41_V5_Unaligned()
{
float* p = this.alignedPtr + 1;
Vector128<float> v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
p += 8;
v = Sse.LoadVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.Store(p, v);
}
[Benchmark]
public unsafe void Sse41_V5_Aligned()
{
float* p = this.alignedPtr;
Vector128<float> v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
v = Sse.LoadAlignedVector128(p);
v = Sse41.RoundToNearestInteger(v);
Sse.StoreAligned(p, v);
p += 8;
}
[Benchmark]
public void Sse41_V6_Aligned()
{
float* p = this.alignedPtr;
Round8SseVectors(p);
Round8SseVectors(p + 32);
}
private static void Round8SseVectors(float* p0)
{
float* p1 = p0 + 4;
float* p2 = p1 + 4;
float* p3 = p2 + 4;
float* p4 = p3 + 4;
float* p5 = p4 + 4;
float* p6 = p5 + 4;
float* p7 = p6 + 4;
Vector128<float> v0 = Sse.LoadAlignedVector128(p0);
Vector128<float> v1 = Sse.LoadAlignedVector128(p1);
Vector128<float> v2 = Sse.LoadAlignedVector128(p2);
Vector128<float> v3 = Sse.LoadAlignedVector128(p3);
Vector128<float> v4 = Sse.LoadAlignedVector128(p4);
Vector128<float> v5 = Sse.LoadAlignedVector128(p5);
Vector128<float> v6 = Sse.LoadAlignedVector128(p6);
Vector128<float> v7 = Sse.LoadAlignedVector128(p7);
v0 = Sse41.RoundToNearestInteger(v0);
v1 = Sse41.RoundToNearestInteger(v1);
v2 = Sse41.RoundToNearestInteger(v2);
v3 = Sse41.RoundToNearestInteger(v3);
v4 = Sse41.RoundToNearestInteger(v4);
v5 = Sse41.RoundToNearestInteger(v5);
v6 = Sse41.RoundToNearestInteger(v6);
v7 = Sse41.RoundToNearestInteger(v7);
Sse.StoreAligned(p0, v0);
Sse.StoreAligned(p1, v1);
Sse.StoreAligned(p2, v2);
Sse.StoreAligned(p3, v3);
Sse.StoreAligned(p4, v4);
Sse.StoreAligned(p5, v5);
Sse.StoreAligned(p6, v6);
Sse.StoreAligned(p7, v7);
}
#endif
} }
} }

1
tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg_ImageSpecific.cs

@ -44,6 +44,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
private string TestImageFullPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, this.TestImage); private string TestImageFullPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, this.TestImage);
#pragma warning disable SA1115
[Params( [Params(
TestImages.Jpeg.BenchmarkSuite.Lake_Small444YCbCr, TestImages.Jpeg.BenchmarkSuite.Lake_Small444YCbCr,
TestImages.Jpeg.BenchmarkSuite.BadRstProgressive518_Large444YCbCr, TestImages.Jpeg.BenchmarkSuite.BadRstProgressive518_Large444YCbCr,

8
tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs

@ -36,7 +36,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
} }
} }
[Benchmark(Baseline = true)] [Benchmark]
public void Scalar() public void Scalar()
{ {
var values = new JpegColorConverter.ComponentValues(this.input, 0); var values = new JpegColorConverter.ComponentValues(this.input, 0);
@ -44,7 +44,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
JpegColorConverter.FromYCbCrBasic.ConvertCore(values, this.output, 255F, 128F); JpegColorConverter.FromYCbCrBasic.ConvertCore(values, this.output, 255F, 128F);
} }
[Benchmark] [Benchmark(Baseline = true)]
public void SimdVector4() public void SimdVector4()
{ {
var values = new JpegColorConverter.ComponentValues(this.input, 0); var values = new JpegColorConverter.ComponentValues(this.input, 0);
@ -53,11 +53,11 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
} }
[Benchmark] [Benchmark]
public void SimdAvx2() public void SimdVector8()
{ {
var values = new JpegColorConverter.ComponentValues(this.input, 0); var values = new JpegColorConverter.ComponentValues(this.input, 0);
JpegColorConverter.FromYCbCrSimdAvx2.ConvertCore(values, this.output, 255F, 128F); JpegColorConverter.FromYCbCrSimdVector8.ConvertCore(values, this.output, 255F, 128F);
} }
private static Buffer2D<float>[] CreateRandomValues( private static Buffer2D<float>[] CreateRandomValues(

126
tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs

@ -7,8 +7,14 @@ using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using BenchmarkDotNet.Attributes; #if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Environments;
using BenchmarkDotNet.Jobs;
using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.PixelFormats;
@ -25,7 +31,8 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
protected Configuration Configuration => Configuration.Default; protected Configuration Configuration => Configuration.Default;
[Params(64, 2048)] // [Params(64, 2048)]
[Params(1024)]
public int Count { get; set; } public int Count { get; set; }
[GlobalSetup] [GlobalSetup]
@ -74,52 +81,105 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan()); Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan()); Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
SimdUtils.FallbackIntrinsics128.BulkConvertNormalizedFloatToByteClampOverflows(sBytes, dFloats); SimdUtils.FallbackIntrinsics128.NormalizedFloatToByteSaturate(sBytes, dFloats);
} }
[Benchmark(Baseline = true)] [Benchmark]
public void BasicIntrinsics256() public void BasicIntrinsics256()
{ {
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan()); Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan()); Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflows(sBytes, dFloats); SimdUtils.BasicIntrinsics256.NormalizedFloatToByteSaturate(sBytes, dFloats);
} }
[Benchmark] [Benchmark(Baseline = true)]
public void ExtendedIntrinsic() public void ExtendedIntrinsic()
{ {
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan()); Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan()); Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
SimdUtils.ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflows(sBytes, dFloats); SimdUtils.ExtendedIntrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats);
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Benchmark]
public void UseAvx2()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
SimdUtils.Avx2Intrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats);
}
private static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 };
[Benchmark]
public void UseAvx2_Grouped()
{
Span<float> src = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
int n = dest.Length / Vector<byte>.Count;
ref Vector256<float> sourceBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(src));
ref Vector256<byte> destBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32);
Vector256<int> mask = Unsafe.As<byte, Vector256<int>>(ref maskBase);
var maxBytes = Vector256.Create(255f);
for (int i = 0; i < n; i++)
{
ref Vector256<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
Vector256<float> f0 = s;
Vector256<float> f1 = Unsafe.Add(ref s, 1);
Vector256<float> f2 = Unsafe.Add(ref s, 2);
Vector256<float> f3 = Unsafe.Add(ref s, 3);
f0 = Avx.Multiply(maxBytes, f0);
f1 = Avx.Multiply(maxBytes, f1);
f2 = Avx.Multiply(maxBytes, f2);
f3 = Avx.Multiply(maxBytes, f3);
Vector256<int> w0 = Avx.ConvertToVector256Int32(f0);
Vector256<int> w1 = Avx.ConvertToVector256Int32(f1);
Vector256<int> w2 = Avx.ConvertToVector256Int32(f2);
Vector256<int> w3 = Avx.ConvertToVector256Int32(f3);
Vector256<short> u0 = Avx2.PackSignedSaturate(w0, w1);
Vector256<short> u1 = Avx2.PackSignedSaturate(w2, w3);
Vector256<byte> b = Avx2.PackUnsignedSaturate(u0, u1);
b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte();
Unsafe.Add(ref destBase, i) = b;
}
} }
// RESULTS (2018 October): [MethodImpl(MethodImplOptions.AggressiveInlining)]
// Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Allocated | private static Vector256<int> ConvertToInt32(Vector256<float> vf, Vector256<float> scale)
// ---------------------------- |-------- |------ |-------------:|-------------:|------------:|-------:|---------:|-------:|----------:| {
// FallbackIntrinsics128 | Clr | 64 | 340.38 ns | 22.319 ns | 1.2611 ns | 1.41 | 0.01 | - | 0 B | vf = Avx.Multiply(scale, vf);
// BasicIntrinsics256 | Clr | 64 | 240.79 ns | 11.421 ns | 0.6453 ns | 1.00 | 0.00 | - | 0 B | return Avx.ConvertToVector256Int32(vf);
// ExtendedIntrinsic | Clr | 64 | 199.09 ns | 124.239 ns | 7.0198 ns | 0.83 | 0.02 | - | 0 B | }
// PixelOperations_Base | Clr | 64 | 647.99 ns | 24.003 ns | 1.3562 ns | 2.69 | 0.01 | 0.0067 | 24 B | #endif
// PixelOperations_Specialized | Clr | 64 | 259.79 ns | 13.391 ns | 0.7566 ns | 1.08 | 0.00 | - | 0 B | <--- ceremonial overhead has been minimized!
// | | | | | | | | | | // *** RESULTS 2020 March: ***
// FallbackIntrinsics128 | Core | 64 | 234.64 ns | 12.320 ns | 0.6961 ns | 1.58 | 0.00 | - | 0 B | // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// BasicIntrinsics256 | Core | 64 | 148.87 ns | 2.794 ns | 0.1579 ns | 1.00 | 0.00 | - | 0 B | // .NET Core SDK=3.1.200-preview-014971
// ExtendedIntrinsic | Core | 64 | 94.06 ns | 10.015 ns | 0.5659 ns | 0.63 | 0.00 | - | 0 B | // Job-IUZXZT : .NET Core 3.1.2 (CoreCLR 4.700.20.6602, CoreFX 4.700.20.6702), X64 RyuJIT
// PixelOperations_Base | Core | 64 | 573.52 ns | 31.865 ns | 1.8004 ns | 3.85 | 0.01 | 0.0067 | 24 B | //
// PixelOperations_Specialized | Core | 64 | 117.21 ns | 13.264 ns | 0.7494 ns | 0.79 | 0.00 | - | 0 B | // | Method | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// | | | | | | | | | | // |---------------------------- |------ |-----------:|------------:|----------:|------:|--------:|------:|------:|------:|----------:|
// FallbackIntrinsics128 | Clr | 2048 | 6,735.93 ns | 2,139.340 ns | 120.8767 ns | 1.71 | 0.03 | - | 0 B | // | FallbackIntrinsics128 | 1024 | 2,952.6 ns | 1,680.77 ns | 92.13 ns | 3.32 | 0.16 | - | - | - | - |
// BasicIntrinsics256 | Clr | 2048 | 3,929.29 ns | 334.027 ns | 18.8731 ns | 1.00 | 0.00 | - | 0 B | // | BasicIntrinsics256 | 1024 | 1,664.5 ns | 928.11 ns | 50.87 ns | 1.87 | 0.09 | - | - | - | - |
// ExtendedIntrinsic | Clr | 2048 | 2,226.01 ns | 130.525 ns | 7.3749 ns |!! 0.57 | 0.00 | - | 0 B | <--- ExtendedIntrinsics rock! // | ExtendedIntrinsic | 1024 | 890.6 ns | 375.48 ns | 20.58 ns | 1.00 | 0.00 | - | - | - | - |
// PixelOperations_Base | Clr | 2048 | 16,760.84 ns | 367.800 ns | 20.7814 ns | 4.27 | 0.02 | - | 24 B | <--- Extra copies using "Vector4 TPixel.ToVector4()" // | UseAvx2 | 1024 | 299.0 ns | 30.47 ns | 1.67 ns | 0.34 | 0.01 | - | - | - | - |
// PixelOperations_Specialized | Clr | 2048 | 3,986.03 ns | 237.238 ns | 13.4044 ns | 1.01 | 0.00 | - | 0 B | <--- can't yet detect whether ExtendedIntrinsics are available :( // | UseAvx2_Grouped | 1024 | 318.1 ns | 48.19 ns | 2.64 ns | 0.36 | 0.01 | - | - | - | - |
// | | | | | | | | | | // | PixelOperations_Base | 1024 | 8,136.9 ns | 1,834.82 ns | 100.57 ns | 9.14 | 0.26 | - | - | - | 24 B |
// FallbackIntrinsics128 | Core | 2048 | 6,644.65 ns | 2,677.090 ns | 151.2605 ns | 1.69 | 0.05 | - | 0 B | // | PixelOperations_Specialized | 1024 | 951.1 ns | 123.93 ns | 6.79 ns | 1.07 | 0.03 | - | - | - | - |
// BasicIntrinsics256 | Core | 2048 | 3,923.70 ns | 1,971.760 ns | 111.4081 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsic | Core | 2048 | 2,092.32 ns | 375.657 ns | 21.2253 ns |!! 0.53 | 0.01 | - | 0 B | <--- ExtendedIntrinsics rock!
// PixelOperations_Base | Core | 2048 | 16,875.73 ns | 1,271.957 ns | 71.8679 ns | 4.30 | 0.10 | - | 24 B |
// PixelOperations_Specialized | Core | 2048 | 2,129.92 ns | 262.888 ns | 14.8537 ns |!! 0.54 | 0.01 | - | 0 B | <--- ExtendedIntrinsics rock!
} }
} }

6
tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs

@ -22,7 +22,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan()); Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan()); Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
SimdUtils.FallbackIntrinsics128.BulkConvertByteToNormalizedFloat(sBytes, dFloats); SimdUtils.FallbackIntrinsics128.ByteToNormalizedFloat(sBytes, dFloats);
} }
[Benchmark] [Benchmark]
@ -40,7 +40,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan()); Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan()); Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
SimdUtils.BasicIntrinsics256.BulkConvertByteToNormalizedFloat(sBytes, dFloats); SimdUtils.BasicIntrinsics256.ByteToNormalizedFloat(sBytes, dFloats);
} }
[Benchmark] [Benchmark]
@ -49,7 +49,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan()); Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan()); Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(sBytes, dFloats); SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(sBytes, dFloats);
} }
// [Benchmark] // [Benchmark]

8
tests/ImageSharp.Benchmarks/Config.cs

@ -38,6 +38,14 @@ namespace SixLabors.ImageSharp.Benchmarks
} }
} }
public class ShortCore31 : Config
{
public ShortCore31()
{
this.Add(Job.Default.With(CoreRuntime.Core31).WithLaunchCount(1).WithWarmupCount(3).WithIterationCount(3));
}
}
#if Windows_NT #if Windows_NT
private bool IsElevated private bool IsElevated
{ {

14
tests/ImageSharp.Tests.ProfilingSandbox/Program.cs

@ -7,6 +7,10 @@ using SixLabors.ImageSharp.Tests.PixelFormats.PixelOperations;
using SixLabors.ImageSharp.Tests.ProfilingBenchmarks; using SixLabors.ImageSharp.Tests.ProfilingBenchmarks;
using Xunit.Abstractions; using Xunit.Abstractions;
// in this file, comments are used for disabling stuff for local execution
#pragma warning disable SA1515
#pragma warning disable SA1512
namespace SixLabors.ImageSharp.Tests.ProfilingSandbox namespace SixLabors.ImageSharp.Tests.ProfilingSandbox
{ {
public class Program public class Program
@ -28,10 +32,9 @@ namespace SixLabors.ImageSharp.Tests.ProfilingSandbox
public static void Main(string[] args) public static void Main(string[] args)
{ {
// RunJpegColorProfilingTests(); // RunJpegColorProfilingTests();
RunDecodeJpegProfilingTests();
// RunDecodeJpegProfilingTests();
// RunToVector4ProfilingTest(); // RunToVector4ProfilingTest();
RunResizeProfilingTest(); // RunResizeProfilingTest();
Console.ReadLine(); Console.ReadLine();
} }
@ -61,8 +64,11 @@ namespace SixLabors.ImageSharp.Tests.ProfilingSandbox
foreach (object[] data in JpegProfilingBenchmarks.DecodeJpegData) foreach (object[] data in JpegProfilingBenchmarks.DecodeJpegData)
{ {
string fileName = (string)data[0]; string fileName = (string)data[0];
benchmarks.DecodeJpeg(fileName); int executionCount = (int)data[1];
benchmarks.DecodeJpeg(fileName, executionCount);
} }
Console.WriteLine("DONE.");
} }
} }
} }

38
tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

@ -105,7 +105,7 @@ namespace SixLabors.ImageSharp.Tests.Common
private bool SkipOnNonAvx2([CallerMemberName] string testCaseName = null) private bool SkipOnNonAvx2([CallerMemberName] string testCaseName = null)
{ {
if (!SimdUtils.IsAvx2CompatibleArchitecture) if (!SimdUtils.HasVector8)
{ {
this.Output.WriteLine("Skipping AVX2 specific test case: " + testCaseName); this.Output.WriteLine("Skipping AVX2 specific test case: " + testCaseName);
return true; return true;
@ -178,7 +178,7 @@ namespace SixLabors.ImageSharp.Tests.Common
{ {
TestImpl_BulkConvertByteToNormalizedFloat( TestImpl_BulkConvertByteToNormalizedFloat(
count, count,
(s, d) => SimdUtils.FallbackIntrinsics128.BulkConvertByteToNormalizedFloat(s.Span, d.Span)); (s, d) => SimdUtils.FallbackIntrinsics128.ByteToNormalizedFloat(s.Span, d.Span));
} }
[Theory] [Theory]
@ -192,7 +192,7 @@ namespace SixLabors.ImageSharp.Tests.Common
TestImpl_BulkConvertByteToNormalizedFloat( TestImpl_BulkConvertByteToNormalizedFloat(
count, count,
(s, d) => SimdUtils.BasicIntrinsics256.BulkConvertByteToNormalizedFloat(s.Span, d.Span)); (s, d) => SimdUtils.BasicIntrinsics256.ByteToNormalizedFloat(s.Span, d.Span));
} }
[Theory] [Theory]
@ -201,7 +201,7 @@ namespace SixLabors.ImageSharp.Tests.Common
{ {
TestImpl_BulkConvertByteToNormalizedFloat( TestImpl_BulkConvertByteToNormalizedFloat(
count, count,
(s, d) => SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(s.Span, d.Span)); (s, d) => SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(s.Span, d.Span));
} }
[Theory] [Theory]
@ -210,7 +210,7 @@ namespace SixLabors.ImageSharp.Tests.Common
{ {
TestImpl_BulkConvertByteToNormalizedFloat( TestImpl_BulkConvertByteToNormalizedFloat(
count, count,
(s, d) => SimdUtils.BulkConvertByteToNormalizedFloat(s.Span, d.Span)); (s, d) => SimdUtils.ByteToNormalizedFloat(s.Span, d.Span));
} }
private static void TestImpl_BulkConvertByteToNormalizedFloat( private static void TestImpl_BulkConvertByteToNormalizedFloat(
@ -232,7 +232,7 @@ namespace SixLabors.ImageSharp.Tests.Common
{ {
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
count, count,
(s, d) => SimdUtils.FallbackIntrinsics128.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span)); (s, d) => SimdUtils.FallbackIntrinsics128.NormalizedFloatToByteSaturate(s.Span, d.Span));
} }
[Theory] [Theory]
@ -244,7 +244,7 @@ namespace SixLabors.ImageSharp.Tests.Common
return; return;
} }
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count, (s, d) => SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span)); TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count, (s, d) => SimdUtils.BasicIntrinsics256.NormalizedFloatToByteSaturate(s.Span, d.Span));
} }
[Theory] [Theory]
@ -253,7 +253,7 @@ namespace SixLabors.ImageSharp.Tests.Common
{ {
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
count, count,
(s, d) => SimdUtils.ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span)); (s, d) => SimdUtils.ExtendedIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span));
} }
[Theory] [Theory]
@ -277,11 +277,29 @@ namespace SixLabors.ImageSharp.Tests.Common
Assert.Equal(expected2, actual2); Assert.Equal(expected2, actual2);
} }
#if SUPPORTS_RUNTIME_INTRINSICS
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy32))]
public void Avx2_BulkConvertNormalizedFloatToByteClampOverflows(int count)
{
if (!System.Runtime.Intrinsics.X86.Avx2.IsSupported)
{
return;
}
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
count,
(s, d) => SimdUtils.Avx2Intrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span));
}
#endif
[Theory] [Theory]
[MemberData(nameof(ArbitraryArraySizes))] [MemberData(nameof(ArbitraryArraySizes))]
public void BulkConvertNormalizedFloatToByteClampOverflows(int count) public void BulkConvertNormalizedFloatToByteClampOverflows(int count)
{ {
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count, (s, d) => SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span)); TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count, (s, d) => SimdUtils.NormalizedFloatToByteSaturate(s.Span, d.Span));
// For small values, let's stress test the implementation a bit: // For small values, let's stress test the implementation a bit:
if (count > 0 && count < 10) if (count > 0 && count < 10)
@ -290,7 +308,7 @@ namespace SixLabors.ImageSharp.Tests.Common
{ {
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
count, count,
(s, d) => SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span), (s, d) => SimdUtils.NormalizedFloatToByteSaturate(s.Span, d.Span),
i + 42); i + 42);
} }
} }

4
tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.CopyToBufferArea.cs

@ -44,7 +44,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
using (Buffer2D<float> buffer = Configuration.Default.MemoryAllocator.Allocate2D<float>(20, 20, AllocationOptions.Clean)) using (Buffer2D<float> buffer = Configuration.Default.MemoryAllocator.Allocate2D<float>(20, 20, AllocationOptions.Clean))
{ {
BufferArea<float> area = buffer.GetArea(5, 10, 8, 8); BufferArea<float> area = buffer.GetArea(5, 10, 8, 8);
block.Copy1x1Scale(area); block.Copy1x1Scale(ref area.GetReferenceToOrigin(), area.Stride);
Assert.Equal(block[0, 0], buffer[5, 10]); Assert.Equal(block[0, 0], buffer[5, 10]);
Assert.Equal(block[1, 0], buffer[6, 10]); Assert.Equal(block[1, 0], buffer[6, 10]);
@ -72,7 +72,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
using (Buffer2D<float> buffer = Configuration.Default.MemoryAllocator.Allocate2D<float>(100, 100, AllocationOptions.Clean)) using (Buffer2D<float> buffer = Configuration.Default.MemoryAllocator.Allocate2D<float>(100, 100, AllocationOptions.Clean))
{ {
BufferArea<float> area = buffer.GetArea(start.X, start.Y, 8 * horizontalFactor, 8 * verticalFactor); BufferArea<float> area = buffer.GetArea(start.X, start.Y, 8 * horizontalFactor, 8 * verticalFactor);
block.CopyTo(area, horizontalFactor, verticalFactor); block.ScaledCopyTo(area, horizontalFactor, verticalFactor);
for (int y = 0; y < 8 * verticalFactor; y++) for (int y = 0; y < 8 * verticalFactor; y++)
{ {

14
tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

@ -29,7 +29,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
private bool SkipOnNonAvx2Runner() private bool SkipOnNonAvx2Runner()
{ {
if (!SimdUtils.IsAvx2CompatibleArchitecture) if (!SimdUtils.HasVector8)
{ {
this.Output.WriteLine("AVX2 not supported, skipping!"); this.Output.WriteLine("AVX2 not supported, skipping!");
return true; return true;
@ -104,7 +104,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{ {
var b = default(Block8x8F); var b = default(Block8x8F);
b.LoadFrom(data); b.LoadFrom(data);
b.CopyTo(mirror); b.ScaledCopyTo(mirror);
}); });
Assert.Equal(data, mirror); Assert.Equal(data, mirror);
@ -129,7 +129,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{ {
var b = default(Block8x8F); var b = default(Block8x8F);
Block8x8F.LoadFrom(&b, data); Block8x8F.LoadFrom(&b, data);
Block8x8F.CopyTo(&b, mirror); Block8x8F.ScaledCopyTo(&b, mirror);
}); });
Assert.Equal(data, mirror); Assert.Equal(data, mirror);
@ -154,7 +154,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{ {
var v = default(Block8x8F); var v = default(Block8x8F);
v.LoadFrom(data); v.LoadFrom(data);
v.CopyTo(mirror); v.ScaledCopyTo(mirror);
}); });
Assert.Equal(data, mirror); Assert.Equal(data, mirror);
@ -175,7 +175,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
source.TransposeInto(ref dest); source.TransposeInto(ref dest);
float[] actual = new float[64]; float[] actual = new float[64];
dest.CopyTo(actual); dest.ScaledCopyTo(actual);
Assert.Equal(expected, actual); Assert.Equal(expected, actual);
} }
@ -231,7 +231,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
dest.NormalizeColorsInplace(255); dest.NormalizeColorsInplace(255);
float[] array = new float[64]; float[] array = new float[64];
dest.CopyTo(array); dest.ScaledCopyTo(array);
this.Output.WriteLine("Result:"); this.Output.WriteLine("Result:");
this.PrintLinearData(array); this.PrintLinearData(array);
foreach (float val in array) foreach (float val in array)
@ -257,7 +257,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
expected.RoundInplace(); expected.RoundInplace();
Block8x8F actual = source; Block8x8F actual = source;
actual.NormalizeColorsAndRoundInplaceAvx2(255); actual.NormalizeColorsAndRoundInplaceVector8(255);
this.Output.WriteLine(expected.ToString()); this.Output.WriteLine(expected.ToString());
this.Output.WriteLine(actual.ToString()); this.Output.WriteLine(actual.ToString());

10
tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs

@ -37,7 +37,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
FastFloatingPointDCT.IDCT8x4_LeftPart(ref source, ref dest); FastFloatingPointDCT.IDCT8x4_LeftPart(ref source, ref dest);
var actualDestArray = new float[64]; var actualDestArray = new float[64];
dest.CopyTo(actualDestArray); dest.ScaledCopyTo(actualDestArray);
this.Print8x8Data(expectedDestArray); this.Print8x8Data(expectedDestArray);
this.Output.WriteLine("**************"); this.Output.WriteLine("**************");
@ -62,7 +62,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
FastFloatingPointDCT.IDCT8x4_RightPart(ref source, ref dest); FastFloatingPointDCT.IDCT8x4_RightPart(ref source, ref dest);
var actualDestArray = new float[64]; var actualDestArray = new float[64];
dest.CopyTo(actualDestArray); dest.ScaledCopyTo(actualDestArray);
this.Print8x8Data(expectedDestArray); this.Print8x8Data(expectedDestArray);
this.Output.WriteLine("**************"); this.Output.WriteLine("**************");
@ -126,7 +126,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
FastFloatingPointDCT.FDCT8x4_LeftPart(ref srcBlock, ref destBlock); FastFloatingPointDCT.FDCT8x4_LeftPart(ref srcBlock, ref destBlock);
var actualDest = new float[64]; var actualDest = new float[64];
destBlock.CopyTo(actualDest); destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
} }
@ -148,7 +148,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
FastFloatingPointDCT.FDCT8x4_RightPart(ref srcBlock, ref destBlock); FastFloatingPointDCT.FDCT8x4_RightPart(ref srcBlock, ref destBlock);
var actualDest = new float[64]; var actualDest = new float[64];
destBlock.CopyTo(actualDest); destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
} }
@ -172,7 +172,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
FastFloatingPointDCT.TransformFDCT(ref srcBlock, ref destBlock, ref temp2, false); FastFloatingPointDCT.TransformFDCT(ref srcBlock, ref destBlock, ref temp2, false);
var actualDest = new float[64]; var actualDest = new float[64];
destBlock.CopyTo(actualDest); destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
} }

4
tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs

@ -99,7 +99,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[MemberData(nameof(CommonConversionData))] [MemberData(nameof(CommonConversionData))]
public void FromYCbCrSimdAvx2(int inputBufferLength, int resultBufferLength, int seed) public void FromYCbCrSimdAvx2(int inputBufferLength, int resultBufferLength, int seed)
{ {
if (!SimdUtils.IsAvx2CompatibleArchitecture) if (!SimdUtils.HasVector8)
{ {
this.Output.WriteLine("No AVX2 present, skipping test!"); this.Output.WriteLine("No AVX2 present, skipping test!");
return; return;
@ -107,7 +107,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
// JpegColorConverter.FromYCbCrSimdAvx2.LogPlz = s => this.Output.WriteLine(s); // JpegColorConverter.FromYCbCrSimdAvx2.LogPlz = s => this.Output.WriteLine(s);
ValidateRgbToYCbCrConversion( ValidateRgbToYCbCrConversion(
new JpegColorConverter.FromYCbCrSimdAvx2(8), new JpegColorConverter.FromYCbCrSimdVector8(8),
3, 3,
inputBufferLength, inputBufferLength,
resultBufferLength, resultBufferLength,

4
tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.LLM_FloatingPoint_DCT.cs

@ -33,7 +33,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
public static Block8x8F TransformIDCT(ref Block8x8F source) public static Block8x8F TransformIDCT(ref Block8x8F source)
{ {
float[] s = new float[64]; float[] s = new float[64];
source.CopyTo(s); source.ScaledCopyTo(s);
float[] d = new float[64]; float[] d = new float[64];
float[] temp = new float[64]; float[] temp = new float[64];
@ -46,7 +46,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
public static Block8x8F TransformFDCT_UpscaleBy8(ref Block8x8F source) public static Block8x8F TransformFDCT_UpscaleBy8(ref Block8x8F source)
{ {
float[] s = new float[64]; float[] s = new float[64];
source.CopyTo(s); source.ScaledCopyTo(s);
float[] d = new float[64]; float[] d = new float[64];
float[] temp = new float[64]; float[] temp = new float[64];

23
tests/ImageSharp.Tests/Processing/Processors/Convolution/DetectEdgesTest.cs

@ -12,9 +12,9 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution
[GroupOutput("Convolution")] [GroupOutput("Convolution")]
public class DetectEdgesTest public class DetectEdgesTest
{ {
// I think our comparison is not accurate enough (nor can be) for RgbaVector. private static readonly ImageComparer OpaqueComparer = ImageComparer.TolerantPercentage(0.01F);
// The image pixels are identical according to BeyondCompare.
private static readonly ImageComparer ValidatorComparer = ImageComparer.TolerantPercentage(0.0456F); private static readonly ImageComparer TransparentComparer = ImageComparer.TolerantPercentage(0.5F);
public static readonly string[] TestImages = { Tests.TestImages.Png.Bike }; public static readonly string[] TestImages = { Tests.TestImages.Png.Bike };
@ -46,7 +46,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution
var bounds = new Rectangle(10, 10, size.Width / 2, size.Height / 2); var bounds = new Rectangle(10, 10, size.Width / 2, size.Height / 2);
ctx.DetectEdges(bounds); ctx.DetectEdges(bounds);
}, },
comparer: ValidatorComparer, comparer: OpaqueComparer,
useReferenceOutputFrom: nameof(this.DetectEdges_InBox)); useReferenceOutputFrom: nameof(this.DetectEdges_InBox));
} }
@ -56,11 +56,13 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution
public void DetectEdges_WorksWithAllFilters<TPixel>(TestImageProvider<TPixel> provider, EdgeDetectionOperators detector) public void DetectEdges_WorksWithAllFilters<TPixel>(TestImageProvider<TPixel> provider, EdgeDetectionOperators detector)
where TPixel : unmanaged, IPixel<TPixel> where TPixel : unmanaged, IPixel<TPixel>
{ {
bool hasAlpha = provider.SourceFileOrDescription.Contains("TestPattern");
ImageComparer comparer = hasAlpha ? TransparentComparer : OpaqueComparer;
using (Image<TPixel> image = provider.GetImage()) using (Image<TPixel> image = provider.GetImage())
{ {
image.Mutate(x => x.DetectEdges(detector)); image.Mutate(x => x.DetectEdges(detector));
image.DebugSave(provider, detector.ToString()); image.DebugSave(provider, detector.ToString());
image.CompareToReferenceOutput(ValidatorComparer, provider, detector.ToString()); image.CompareToReferenceOutput(comparer, provider, detector.ToString());
} }
} }
@ -69,11 +71,18 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution
public void DetectEdges_IsNotBoundToSinglePixelType<TPixel>(TestImageProvider<TPixel> provider) public void DetectEdges_IsNotBoundToSinglePixelType<TPixel>(TestImageProvider<TPixel> provider)
where TPixel : unmanaged, IPixel<TPixel> where TPixel : unmanaged, IPixel<TPixel>
{ {
// James:
// I think our comparison is not accurate enough (nor can be) for RgbaVector.
// The image pixels are identical according to BeyondCompare.
ImageComparer comparer = typeof(TPixel) == typeof(RgbaVector) ?
ImageComparer.TolerantPercentage(1f) :
OpaqueComparer;
using (Image<TPixel> image = provider.GetImage()) using (Image<TPixel> image = provider.GetImage())
{ {
image.Mutate(x => x.DetectEdges()); image.Mutate(x => x.DetectEdges());
image.DebugSave(provider); image.DebugSave(provider);
image.CompareToReferenceOutput(ValidatorComparer, provider); image.CompareToReferenceOutput(comparer, provider);
} }
} }
@ -100,7 +109,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution
image.Mutate(x => x.DetectEdges(bounds)); image.Mutate(x => x.DetectEdges(bounds));
image.DebugSave(provider); image.DebugSave(provider);
image.CompareToReferenceOutput(ValidatorComparer, provider); image.CompareToReferenceOutput(OpaqueComparer, provider);
} }
} }

13
tests/ImageSharp.Tests/Processing/Processors/Effects/OilPaintTest.cs

@ -3,7 +3,7 @@
using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing; using SixLabors.ImageSharp.Processing;
using SixLabors.ImageSharp.Tests.TestUtilities.ImageComparison;
using Xunit; using Xunit;
namespace SixLabors.ImageSharp.Tests.Processing.Processors.Effects namespace SixLabors.ImageSharp.Tests.Processing.Processors.Effects
@ -29,8 +29,12 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Effects
where TPixel : unmanaged, IPixel<TPixel> where TPixel : unmanaged, IPixel<TPixel>
{ {
provider.RunValidatingProcessorTest( provider.RunValidatingProcessorTest(
x => x.OilPaint(levels, brushSize), x =>
$"{levels}-{brushSize}", {
x.OilPaint(levels, brushSize);
return $"{levels}-{brushSize}";
},
ImageComparer.TolerantPercentage(0.01F),
appendPixelTypeToFileName: false); appendPixelTypeToFileName: false);
} }
@ -42,7 +46,8 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Effects
{ {
provider.RunRectangleConstrainedValidatingProcessorTest( provider.RunRectangleConstrainedValidatingProcessorTest(
(x, rect) => x.OilPaint(levels, brushSize, rect), (x, rect) => x.OilPaint(levels, brushSize, rect),
$"{levels}-{brushSize}"); $"{levels}-{brushSize}",
ImageComparer.TolerantPercentage(0.01F));
} }
} }
} }

2
tests/ImageSharp.Tests/Processing/Processors/Quantization/QuantizerTests.cs

@ -19,7 +19,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Quantization
/// Not worth investigating for now. /// Not worth investigating for now.
/// <see href="https://github.com/SixLabors/ImageSharp/pull/1114/checks?check_run_id=448891164#step:11:631"/> /// <see href="https://github.com/SixLabors/ImageSharp/pull/1114/checks?check_run_id=448891164#step:11:631"/>
/// </summary> /// </summary>
private static readonly bool SkipAllQuantizerTests = TestEnvironment.RunsOnCI && TestEnvironment.IsFramework; private static readonly bool SkipAllQuantizerTests = TestEnvironment.IsFramework;
public static readonly string[] CommonTestImages = public static readonly string[] CommonTestImages =
{ {

12
tests/ImageSharp.Tests/Processing/Processors/Transforms/EntropyCropTest.cs

@ -1,6 +1,7 @@
// Copyright (c) Six Labors and contributors. // Copyright (c) Six Labors and contributors.
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System;
using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing; using SixLabors.ImageSharp.Processing;
using Xunit; using Xunit;
@ -24,7 +25,16 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Transforms
public void EntropyCrop<TPixel>(TestImageProvider<TPixel> provider, float value) public void EntropyCrop<TPixel>(TestImageProvider<TPixel> provider, float value)
where TPixel : unmanaged, IPixel<TPixel> where TPixel : unmanaged, IPixel<TPixel>
{ {
// The result dimensions of EntropyCrop may differ on .NET Core 3.1 because of unstable edge detection results.
// TODO: Re-enable this test case if we manage to improve stability.
#if SUPPORTS_RUNTIME_INTRINSICS
if (provider.SourceFileOrDescription.Contains(TestImages.Png.Ducky))
{
return;
}
#endif
provider.RunValidatingProcessorTest(x => x.EntropyCrop(value), value, appendPixelTypeToFileName: false); provider.RunValidatingProcessorTest(x => x.EntropyCrop(value), value, appendPixelTypeToFileName: false);
} }
} }
} }

31
tests/ImageSharp.Tests/ProfilingBenchmarks/JpegProfilingBenchmarks.cs

@ -13,6 +13,9 @@ using SixLabors.ImageSharp.PixelFormats;
using Xunit; using Xunit;
using Xunit.Abstractions; using Xunit.Abstractions;
// in this file, comments are used for disabling stuff for local execution
#pragma warning disable SA1515
namespace SixLabors.ImageSharp.Tests.ProfilingBenchmarks namespace SixLabors.ImageSharp.Tests.ProfilingBenchmarks
{ {
public class JpegProfilingBenchmarks : MeasureFixture public class JpegProfilingBenchmarks : MeasureFixture
@ -22,24 +25,28 @@ namespace SixLabors.ImageSharp.Tests.ProfilingBenchmarks
{ {
} }
public static readonly TheoryData<string> DecodeJpegData = new TheoryData<string> public static readonly TheoryData<string, int> DecodeJpegData = new TheoryData<string, int>
{ {
TestImages.Jpeg.BenchmarkSuite.Jpeg400_SmallMonochrome, { TestImages.Jpeg.BenchmarkSuite.Jpeg400_SmallMonochrome, 20 },
TestImages.Jpeg.BenchmarkSuite.Jpeg420Exif_MidSizeYCbCr, { TestImages.Jpeg.BenchmarkSuite.Jpeg420Exif_MidSizeYCbCr, 20 },
TestImages.Jpeg.BenchmarkSuite.Lake_Small444YCbCr, { TestImages.Jpeg.BenchmarkSuite.Lake_Small444YCbCr, 40 },
TestImages.Jpeg.BenchmarkSuite.MissingFF00ProgressiveBedroom159_MidSize420YCbCr, // { TestImages.Jpeg.BenchmarkSuite.MissingFF00ProgressiveBedroom159_MidSize420YCbCr, 10 },
TestImages.Jpeg.BenchmarkSuite.BadRstProgressive518_Large444YCbCr, // { TestImages.Jpeg.BenchmarkSuite.BadRstProgressive518_Large444YCbCr, 5 },
TestImages.Jpeg.BenchmarkSuite.ExifGetString750Transform_Huge420YCbCr, { TestImages.Jpeg.BenchmarkSuite.ExifGetString750Transform_Huge420YCbCr, 5 }
}; };
[Theory(Skip = ProfilingSetup.SkipProfilingTests)] [Theory(Skip = ProfilingSetup.SkipProfilingTests)]
[MemberData(nameof(DecodeJpegData))] [MemberData(nameof(DecodeJpegData))]
public void DecodeJpeg(string fileName) public void DecodeJpeg(string fileName, int executionCount)
{ {
this.DecodeJpegBenchmarkImpl(fileName, new JpegDecoder()); var decoder = new JpegDecoder()
{
IgnoreMetadata = true
};
this.DecodeJpegBenchmarkImpl(fileName, decoder, executionCount);
} }
private void DecodeJpegBenchmarkImpl(string fileName, IImageDecoder decoder) private void DecodeJpegBenchmarkImpl(string fileName, IImageDecoder decoder, int executionCount)
{ {
// do not run this on CI even by accident // do not run this on CI even by accident
if (TestEnvironment.RunsOnCI) if (TestEnvironment.RunsOnCI)
@ -47,8 +54,6 @@ namespace SixLabors.ImageSharp.Tests.ProfilingBenchmarks
return; return;
} }
const int ExecutionCount = 20;
if (!Vector.IsHardwareAccelerated) if (!Vector.IsHardwareAccelerated)
{ {
throw new Exception("Vector.IsHardwareAccelerated == false! ('prefer32 bit' enabled?)"); throw new Exception("Vector.IsHardwareAccelerated == false! ('prefer32 bit' enabled?)");
@ -58,7 +63,7 @@ namespace SixLabors.ImageSharp.Tests.ProfilingBenchmarks
byte[] bytes = File.ReadAllBytes(path); byte[] bytes = File.ReadAllBytes(path);
this.Measure( this.Measure(
ExecutionCount, executionCount,
() => () =>
{ {
var img = Image.Load<Rgba32>(bytes, decoder); var img = Image.Load<Rgba32>(bytes, decoder);

Loading…
Cancel
Save