diff --git a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs
index 39c442fe0..cbf164a71 100644
--- a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs
+++ b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs
@@ -29,7 +29,19 @@ namespace SixLabors.ImageSharp.PixelFormats
/// The number of pixels to convert.
internal virtual void PackFromVector4(ReadOnlySpan sourceVectors, Span destinationColors, int count)
{
- PackFromVector4Common(sourceVectors, destinationColors, count);
+ ReadOnlySpan sourceVectors1 = sourceVectors;
+ Span destinationColors1 = destinationColors;
+ GuardSpans(sourceVectors1, nameof(sourceVectors1), destinationColors1, nameof(destinationColors1), count);
+
+ ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceVectors1);
+ ref TPixel destRef = ref MemoryMarshal.GetReference(destinationColors1);
+
+ for (int i = 0; i < count; i++)
+ {
+ ref Vector4 sp = ref Unsafe.Add(ref sourceRef, i);
+ ref TPixel dp = ref Unsafe.Add(ref destRef, i);
+ dp.PackFromVector4(sp);
+ }
}
///
@@ -40,7 +52,19 @@ namespace SixLabors.ImageSharp.PixelFormats
/// The number of pixels to convert.
internal virtual void ToVector4(ReadOnlySpan sourceColors, Span destinationVectors, int count)
{
- ToVector4Common(sourceColors, destinationVectors, count);
+ ReadOnlySpan sourceColors1 = sourceColors;
+ Span destinationVectors1 = destinationVectors;
+ GuardSpans(sourceColors1, nameof(sourceColors1), destinationVectors1, nameof(destinationVectors1), count);
+
+ ref TPixel sourceRef = ref MemoryMarshal.GetReference(sourceColors1);
+ ref Vector4 destRef = ref MemoryMarshal.GetReference(destinationVectors1);
+
+ for (int i = 0; i < count; i++)
+ {
+ ref TPixel sp = ref Unsafe.Add(ref sourceRef, i);
+ ref Vector4 dp = ref Unsafe.Add(ref destRef, i);
+ dp = sp.ToVector4();
+ }
}
///
@@ -106,37 +130,5 @@ namespace SixLabors.ImageSharp.PixelFormats
Guard.MustBeSizedAtLeast(source, minLength, sourceParamName);
Guard.MustBeSizedAtLeast(destination, minLength, destinationParamName);
}
-
- [MethodImpl(InliningOptions.ShortMethod)]
- internal static void PackFromVector4Common(ReadOnlySpan sourceVectors, Span destinationColors, int count)
- {
- GuardSpans(sourceVectors, nameof(sourceVectors), destinationColors, nameof(destinationColors), count);
-
- ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceVectors);
- ref TPixel destRef = ref MemoryMarshal.GetReference(destinationColors);
-
- for (int i = 0; i < count; i++)
- {
- ref Vector4 sp = ref Unsafe.Add(ref sourceRef, i);
- ref TPixel dp = ref Unsafe.Add(ref destRef, i);
- dp.PackFromVector4(sp);
- }
- }
-
- [MethodImpl(InliningOptions.ShortMethod)]
- internal static void ToVector4Common(ReadOnlySpan sourceColors, Span destinationVectors, int count)
- {
- GuardSpans(sourceColors, nameof(sourceColors), destinationVectors, nameof(destinationVectors), count);
-
- ref TPixel sourceRef = ref MemoryMarshal.GetReference(sourceColors);
- ref Vector4 destRef = ref MemoryMarshal.GetReference(destinationVectors);
-
- for (int i = 0; i < count; i++)
- {
- ref TPixel sp = ref Unsafe.Add(ref sourceRef, i);
- ref Vector4 dp = ref Unsafe.Add(ref destRef, i);
- dp = sp.ToVector4();
- }
- }
}
}
\ No newline at end of file
diff --git a/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs b/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs
index 564b93ef5..bb42ec7e3 100644
--- a/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs
+++ b/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs
@@ -24,21 +24,12 @@ namespace SixLabors.ImageSharp.PixelFormats
Guard.MustBeSizedAtLeast(sourceColors, count, nameof(sourceColors));
Guard.MustBeSizedAtLeast(destinationVectors, count, nameof(destinationVectors));
- if (count < 128 || !SimdUtils.IsAvx2CompatibleArchitecture)
- {
- // Doesn't worth to bother with SIMD:
- ToVector4Common(sourceColors, destinationVectors, count);
- return;
- }
+ sourceColors = sourceColors.Slice(0, count);
+ destinationVectors = destinationVectors.Slice(0, count);
- if (SimdUtils.ExtendedIntrinsics.IsAvailable)
- {
- ConvertToVector4UsingExtendedIntrinsics(sourceColors, destinationVectors, count);
- }
- else
- {
- ConvertToVector4UsingBasicIntrinsics(sourceColors, destinationVectors, count);
- }
+ SimdUtils.BulkConvertByteToNormalizedFloat(
+ MemoryMarshal.Cast(sourceColors),
+ MemoryMarshal.Cast(destinationVectors));
}
///
@@ -46,20 +37,12 @@ namespace SixLabors.ImageSharp.PixelFormats
{
GuardSpans(sourceVectors, nameof(sourceVectors), destinationColors, nameof(destinationColors), count);
- if (count < 128 || !SimdUtils.IsAvx2CompatibleArchitecture)
- {
- PackFromVector4Common(sourceVectors, destinationColors, count);
- return;
- }
+ sourceVectors = sourceVectors.Slice(0, count);
+ destinationColors = destinationColors.Slice(0, count);
- if (SimdUtils.ExtendedIntrinsics.IsAvailable)
- {
- ConvertFromVector4ExtendedIntrinsics(sourceVectors, destinationColors, count);
- }
- else
- {
- ConvertFromVector4BasicIntrinsics(sourceVectors, destinationColors, count);
- }
+ SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(
+ MemoryMarshal.Cast(sourceVectors),
+ MemoryMarshal.Cast(destinationColors));
}
///
@@ -89,92 +72,6 @@ namespace SixLabors.ImageSharp.PixelFormats
sourcePixels.Slice(0, count).CopyTo(dest);
}
-
- private static void ConvertToVector4UsingExtendedIntrinsics(
- ReadOnlySpan sourceColors,
- Span destinationVectors,
- int count)
- {
- int remainder = count % 8;
- int alignedCount = count - remainder;
-
- if (alignedCount > 0)
- {
- ReadOnlySpan rawSrc = MemoryMarshal.Cast(sourceColors);
- Span rawDest = MemoryMarshal.Cast(destinationVectors.Slice(0, alignedCount));
-
- SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(rawSrc, rawDest);
- }
-
- if (remainder > 0)
- {
- ToVector4Common(sourceColors.Slice(alignedCount), destinationVectors.Slice(alignedCount), remainder);
- }
- }
-
- private static void ConvertToVector4UsingBasicIntrinsics(
- ReadOnlySpan sourceColors,
- Span destinationVectors,
- int count)
- {
- int remainder = count % 2;
- int alignedCount = count - remainder;
-
- if (alignedCount > 0)
- {
- ReadOnlySpan rawSrc = MemoryMarshal.Cast(sourceColors);
- Span rawDest = MemoryMarshal.Cast(destinationVectors.Slice(0, alignedCount));
-
- SimdUtils.BasicIntrinsics256.BulkConvertByteToNormalizedFloat(rawSrc, rawDest);
- }
-
- if (remainder > 0)
- {
- // actually: remainder == 1
- int lastIdx = count - 1;
- destinationVectors[lastIdx] = sourceColors[lastIdx].ToVector4();
- }
- }
-
- private static void ConvertFromVector4ExtendedIntrinsics(ReadOnlySpan sourceVectors, Span destinationColors, int count)
- {
- int remainder = count % 8;
- int alignedCount = count - remainder;
-
- if (alignedCount > 0)
- {
- ReadOnlySpan rawSrc = MemoryMarshal.Cast(sourceVectors);
- Span rawDest = MemoryMarshal.Cast(destinationColors.Slice(0, alignedCount));
-
- SimdUtils.ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflows(rawSrc, rawDest);
- }
-
- if (remainder > 0)
- {
- PackFromVector4Common(sourceVectors.Slice(alignedCount), destinationColors.Slice(alignedCount), remainder);
- }
- }
-
- private static void ConvertFromVector4BasicIntrinsics(ReadOnlySpan sourceVectors, Span destinationColors, int count)
- {
- int remainder = count % 2;
- int alignedCount = count - remainder;
-
- if (alignedCount > 0)
- {
- ReadOnlySpan rawSrc = MemoryMarshal.Cast(sourceVectors.Slice(0, alignedCount));
- Span rawDest = MemoryMarshal.Cast(destinationColors);
-
- SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflows(rawSrc, rawDest);
- }
-
- if (remainder > 0)
- {
- // actually: remainder == 1
- int lastIdx = count - 1;
- destinationColors[lastIdx].PackFromVector4(sourceVectors[lastIdx]);
- }
- }
}
}
}
\ No newline at end of file
diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs
index eb7154955..7a212b052 100644
--- a/tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs
+++ b/tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs
@@ -25,7 +25,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
protected IMemoryOwner destination;
[Params(
- //64,
+ 64,
2048
)]
public int Count { get; set; }
@@ -72,7 +72,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
public class PackFromVector4_Rgba32 : PackFromVector4
{
[Benchmark]
- public void FastDefault()
+ public void BasicBulk()
{
ref Vector4 sBase = ref this.source.GetSpan()[0];
ref Rgba32 dBase = ref this.destination.GetSpan()[0];
@@ -112,16 +112,31 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
SimdUtils.ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflows(sBytes, dFloats);
}
- // RESULTS:
- // Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Allocated |
- // ----------------------------------------------------------------- |-------- |------ |----------:|----------:|----------:|-------:|---------:|----------:|
- // FastDefault | Clr | 2048 | 15.989 us | 6.1384 us | 0.3468 us | 4.07 | 0.08 | 0 B |
- // BulkConvertNormalizedFloatToByteClampOverflows | Clr | 2048 | 3.931 us | 0.6264 us | 0.0354 us | 1.00 | 0.00 | 0 B |
- // ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Clr | 2048 | 2.100 us | 0.4717 us | 0.0267 us | 0.53 | 0.01 | 0 B |
- //
- // | | | | | | | | |
- // FastDefault | Core | 2048 | 14.693 us | 0.5131 us | 0.0290 us | 3.76 | 0.03 | 0 B |
- // BulkConvertNormalizedFloatToByteClampOverflows | Core | 2048 | 3.913 us | 0.5661 us | 0.0320 us | 1.00 | 0.00 | 0 B |
- // ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Core | 2048 | 1.966 us | 0.4056 us | 0.0229 us | 0.50 | 0.01 | 0 B |
+ // RESULTS (2018 October):
+ // Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Allocated |
+ // ------------------------------------------------------------------ |-------- |------ |-------------:|-------------:|-----------:|-------:|---------:|-------:|----------:|
+ // BasicBulk | Clr | 64 | 581.62 ns | 33.625 ns | 1.8999 ns | 2.27 | 0.02 | - | 0 B |
+ // BasicIntrinsics256_BulkConvertNormalizedFloatToByteClampOverflows | Clr | 64 | 256.66 ns | 45.153 ns | 2.5512 ns | 1.00 | 0.00 | - | 0 B |
+ // ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Clr | 64 | 201.92 ns | 30.161 ns | 1.7042 ns | 0.79 | 0.01 | - | 0 B |
+ // PixelOperations_Base | Clr | 64 | 665.01 ns | 13.032 ns | 0.7363 ns | 2.59 | 0.02 | 0.0067 | 24 B |
+ // PixelOperations_Specialized | Clr | 64 | 295.14 ns | 26.335 ns | 1.4880 ns | 1.15 | 0.01 | - | 0 B |
+ // | | | | | | | | | |
+ // BasicBulk | Core | 64 | 513.22 ns | 91.110 ns | 5.1479 ns | 3.19 | 0.03 | - | 0 B |
+ // BasicIntrinsics256_BulkConvertNormalizedFloatToByteClampOverflows | Core | 64 | 160.76 ns | 2.760 ns | 0.1559 ns | 1.00 | 0.00 | - | 0 B |
+ // ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Core | 64 | 95.98 ns | 10.077 ns | 0.5694 ns | 0.60 | 0.00 | - | 0 B |
+ // PixelOperations_Base | Core | 64 | 591.74 ns | 49.856 ns | 2.8170 ns | 3.68 | 0.01 | 0.0067 | 24 B |
+ // PixelOperations_Specialized | Core | 64 | 149.11 ns | 4.485 ns | 0.2534 ns | 0.93 | 0.00 | - | 0 B |
+ // | | | | | | | | | |
+ // BasicBulk | Clr | 2048 | 15,345.85 ns | 1,213.551 ns | 68.5679 ns | 3.90 | 0.01 | - | 0 B |
+ // BasicIntrinsics256_BulkConvertNormalizedFloatToByteClampOverflows | Clr | 2048 | 3,939.49 ns | 71.101 ns | 4.0173 ns | 1.00 | 0.00 | - | 0 B |
+ // ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Clr | 2048 | 2,272.61 ns | 110.671 ns | 6.2531 ns | 0.58 | 0.00 | - | 0 B |
+ // PixelOperations_Base | Clr | 2048 | 17,422.47 ns | 811.733 ns | 45.8644 ns | 4.42 | 0.01 | - | 24 B |
+ // PixelOperations_Specialized | Clr | 2048 | 3,984.26 ns | 110.352 ns | 6.2351 ns | 1.01 | 0.00 | - | 0 B |
+ // | | | | | | | | | |
+ // BasicBulk | Core | 2048 | 14,950.43 ns | 699.309 ns | 39.5123 ns | 3.76 | 0.02 | - | 0 B |
+ // BasicIntrinsics256_BulkConvertNormalizedFloatToByteClampOverflows | Core | 2048 | 3,978.28 ns | 481.105 ns | 27.1833 ns | 1.00 | 0.00 | - | 0 B |
+ // ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Core | 2048 | 2,169.54 ns | 75.606 ns | 4.2719 ns | !!0.55!| 0.00 | - | 0 B |
+ // PixelOperations_Base | Core | 2048 | 18,403.62 ns | 1,494.056 ns | 84.4169 ns | 4.63 | 0.03 | - | 24 B |
+ // PixelOperations_Specialized | Core | 2048 | 2,227.60 ns | 486.761 ns | 27.5029 ns | !!0.56!| 0.01 | - | 0 B |
}
}
\ No newline at end of file
diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs
index c50c7ce5a..4a801d64e 100644
--- a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs
+++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs
@@ -28,7 +28,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
protected IMemoryOwner destination;
[Params(
- //64,
+ 64,
//256,
//512,
//1024,
@@ -160,7 +160,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
}
}
- [Benchmark]
+ //[Benchmark]
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_ConvertInSameLoop()
{
Span sBytes = MemoryMarshal.Cast(this.source.GetSpan());
@@ -201,5 +201,33 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
v *= scale;
return v;
}
+
+ // RESULTS (2018 October):
+ //
+ // Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Allocated |
+ // ---------------------------------------------------- |-------- |------ |------------:|-------------:|-----------:|-------:|---------:|-------:|----------:|
+ // BasicBulk | Clr | 64 | 267.40 ns | 30.711 ns | 1.7352 ns | 1.07 | 0.01 | - | 0 B |
+ // BasicIntrinsics256_BulkConvertByteToNormalizedFloat | Clr | 64 | 249.97 ns | 33.838 ns | 1.9119 ns | 1.00 | 0.00 | - | 0 B |
+ // ExtendedIntrinsics_BulkConvertByteToNormalizedFloat | Clr | 64 | 176.97 ns | 5.221 ns | 0.2950 ns | 0.71 | 0.00 | - | 0 B |
+ // PixelOperations_Base | Clr | 64 | 349.70 ns | 104.331 ns | 5.8949 ns | 1.40 | 0.02 | 0.0072 | 24 B |
+ // PixelOperations_Specialized | Clr | 64 | 288.31 ns | 26.833 ns | 1.5161 ns | 1.15 | 0.01 | - | 0 B |
+ // | | | | | | | | | |
+ // BasicBulk | Core | 64 | 185.36 ns | 30.051 ns | 1.6979 ns | 1.26 | 0.01 | - | 0 B |
+ // BasicIntrinsics256_BulkConvertByteToNormalizedFloat | Core | 64 | 146.84 ns | 12.674 ns | 0.7161 ns | 1.00 | 0.00 | - | 0 B |
+ // ExtendedIntrinsics_BulkConvertByteToNormalizedFloat | Core | 64 | 67.31 ns | 2.542 ns | 0.1436 ns | 0.46 | 0.00 | - | 0 B |
+ // PixelOperations_Base | Core | 64 | 272.03 ns | 94.419 ns | 5.3348 ns | 1.85 | 0.03 | 0.0072 | 24 B |
+ // PixelOperations_Specialized | Core | 64 | 121.91 ns | 31.477 ns | 1.7785 ns | 0.83 | 0.01 | - | 0 B |
+ // | | | | | | | | | |
+ // BasicBulk | Clr | 2048 | 5,133.04 ns | 284.052 ns | 16.0494 ns | 1.21 | 0.01 | - | 0 B |
+ // BasicIntrinsics256_BulkConvertByteToNormalizedFloat | Clr | 2048 | 4,248.58 ns | 1,095.887 ns | 61.9196 ns | 1.00 | 0.00 | - | 0 B |
+ // ExtendedIntrinsics_BulkConvertByteToNormalizedFloat | Clr | 2048 | 1,214.02 ns | 184.349 ns | 10.4160 ns | 0.29 | 0.00 | - | 0 B |
+ // PixelOperations_Base | Clr | 2048 | 7,096.04 ns | 362.350 ns | 20.4734 ns | 1.67 | 0.02 | - | 24 B |
+ // PixelOperations_Specialized | Clr | 2048 | 4,314.19 ns | 204.964 ns | 11.5809 ns | 1.02 | 0.01 | - | 0 B |
+ // | | | | | | | | | |
+ // BasicBulk | Core | 2048 | 5,038.38 ns | 223.282 ns | 12.6158 ns | 1.20 | 0.01 | - | 0 B |
+ // BasicIntrinsics256_BulkConvertByteToNormalizedFloat | Core | 2048 | 4,199.17 ns | 897.985 ns | 50.7378 ns | 1.00 | 0.00 | - | 0 B |
+ // ExtendedIntrinsics_BulkConvertByteToNormalizedFloat | Core | 2048 | 1,113.86 ns | 64.799 ns | 3.6613 ns | !!0.27!| 0.00 | - | 0 B |
+ // PixelOperations_Base | Core | 2048 | 7,015.00 ns | 920.083 ns | 51.9864 ns | 1.67 | 0.02 | - | 24 B |
+ // PixelOperations_Specialized | Core | 2048 | 1,176.59 ns | 256.955 ns | 14.5184 ns | !!0.28!| 0.00 | - | 0 B |
}
}
\ No newline at end of file