diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs b/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
index e4dc1a1d8..a8b343498 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
@@ -46,53 +46,6 @@ namespace SixLabors.ImageSharp
}
}
- ///
- /// Convert 'source.Length' values normalized into [0..1] from 'source'
- /// into 'dest' buffer of . The values are scaled up into [0-255] and rounded.
- /// The implementation is SIMD optimized and works only with `source.Length` divisible by 8/>.
- /// Based on:
- ///
- /// http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions
- ///
- ///
- internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan source, Span dest)
- {
- GuardAvx2(nameof(BulkConvertNormalizedFloatToByte));
-
- DebugGuard.IsTrue((source.Length % Vector.Count) == 0, nameof(source), "source.Length should be divisable by Vector.Count!");
-
- if (source.Length == 0)
- {
- return;
- }
-
- ref Vector srcBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
- ref Octet.OfByte destBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
- int n = source.Length / 8;
-
- Vector magick = new Vector(32768.0f);
- Vector scale = new Vector(255f) / new Vector(256f);
-
- // need to copy to a temporary struct, because
- // SimdUtils.Octet.OfUInt32 temp = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x)
- // does not work. TODO: This might be a CoreClr bug, need to ask/report
- var temp = default(Octet.OfUInt32);
- ref Vector tempRef = ref Unsafe.As>(ref temp);
-
- for (int i = 0; i < n; i++)
- {
- // union { float f; uint32_t i; } u;
- // u.f = 32768.0f + x * (255.0f / 256.0f);
- // return (uint8_t)u.i;
- Vector x = Unsafe.Add(ref srcBase, i);
- x = (x * scale) + magick;
- tempRef = x;
-
- ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
- d.LoadFrom(ref temp);
- }
- }
-
///
/// SIMD optimized implementation for .
/// Works only with `dest.Length` divisible by 8.
@@ -165,7 +118,7 @@ namespace SixLabors.ImageSharp
}
///
- /// Same as but clamps overflown values before conversion.
+ /// Implementation of which is faster on older runtimes.
///
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan source, Span dest)
{
@@ -207,6 +160,53 @@ namespace SixLabors.ImageSharp
d.LoadFrom(ref temp);
}
}
+
+ ///
+ /// Convert 'source.Length' values normalized into [0..1] from 'source'
+ /// into 'dest' buffer of . The values are scaled up into [0-255] and rounded.
+ /// The implementation is SIMD optimized and works only with `source.Length` divisible by 8.
+ /// Based on:
+ ///
+ /// http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions
+ ///
+ ///
+ internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan source, Span dest)
+ {
+ GuardAvx2(nameof(BulkConvertNormalizedFloatToByte));
+
+ DebugGuard.IsTrue((source.Length % Vector.Count) == 0, nameof(source), "source.Length should be divisable by Vector.Count!");
+
+ if (source.Length == 0)
+ {
+ return;
+ }
+
+ ref Vector srcBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Octet.OfByte destBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
+ int n = source.Length / 8;
+
+ Vector magick = new Vector(32768.0f);
+ Vector scale = new Vector(255f) / new Vector(256f);
+
+ // need to copy to a temporary struct, because
+ // SimdUtils.Octet.OfUInt32 temp = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x)
+ // does not work. TODO: This might be a CoreClr bug, need to ask/report
+ var temp = default(Octet.OfUInt32);
+ ref Vector tempRef = ref Unsafe.As>(ref temp);
+
+ for (int i = 0; i < n; i++)
+ {
+ // union { float f; uint32_t i; } u;
+ // u.f = 32768.0f + x * (255.0f / 256.0f);
+ // return (uint8_t)u.i;
+ Vector x = Unsafe.Add(ref srcBase, i);
+ x = (x * scale) + magick;
+ tempRef = x;
+
+ ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
+ d.LoadFrom(ref temp);
+ }
+ }
}
}
}
\ No newline at end of file
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
index 5c0b8ee93..fd263b54c 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
@@ -52,9 +52,8 @@ namespace SixLabors.ImageSharp
}
///
- /// A variant of , which is faster on new RyuJIT runtime.
+ /// Implementation , which is faster on new RyuJIT runtime.
///
- // ReSharper disable once MemberHidesStaticFromOuterClass
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan source, Span dest)
{
DebugGuard.IsTrue(
@@ -116,13 +115,8 @@ namespace SixLabors.ImageSharp
}
///
- /// A variant of , which is faster on new .NET runtime.
+ /// Implementation of , which is faster on new .NET runtime.
///
- ///
- /// It does NOT worth yet to utilize this method (2018 Oct).
- /// See benchmark results for the "PackFromVector4_Rgba32" benchmark!
- /// TODO: Check again later!
- ///
internal static void BulkConvertNormalizedFloatToByteClampOverflows(
ReadOnlySpan source,
Span dest)
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs
index 73e9bacfa..111ac2240 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs
@@ -22,17 +22,10 @@ namespace SixLabors.ImageSharp
public static bool IsAvx2CompatibleArchitecture { get; } =
Vector.IsHardwareAccelerated && Vector.Count == 8 && Vector.Count == 8;
- internal static void GuardAvx2(string operation)
- {
- if (!IsAvx2CompatibleArchitecture)
- {
- throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!");
- }
- }
-
///
/// Transform all scalars in 'v' in a way that converting them to would have rounding semantics.
///
+ /// The vector
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector4 PseudoRound(this Vector4 v)
{
@@ -48,14 +41,15 @@ namespace SixLabors.ImageSharp
/// https://github.com/g-truc/glm/blob/master/glm/simd/common.h#L110
///
///
+ /// The vector
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal static Vector FastRound(this Vector x)
+ internal static Vector FastRound(this Vector v)
{
Vector magic0 = new Vector(int.MinValue); // 0x80000000
Vector sgn0 = Vector.AsVectorSingle(magic0);
- Vector and0 = Vector.BitwiseAnd(sgn0, x);
+ Vector and0 = Vector.BitwiseAnd(sgn0, v);
Vector or0 = Vector.BitwiseOr(and0, new Vector(8388608.0f));
- Vector add0 = Vector.Add(x, or0);
+ Vector add0 = Vector.Add(v, or0);
Vector sub0 = Vector.Subtract(add0, or0);
return sub0;
}
@@ -65,6 +59,8 @@ namespace SixLabors.ImageSharp
/// should be the of the same size as ,
/// but there are no restrictions on the span's length.
///
+ /// The source span of bytes
+ /// The destination span of floats
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan source, Span dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
@@ -92,6 +88,8 @@ namespace SixLabors.ImageSharp
/// should be the of the same size as ,
/// but there are no restrictions on the span's length.
///
+ /// The source span of floats
+ /// The destination span of bytes
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan source, Span dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
@@ -119,5 +117,13 @@ namespace SixLabors.ImageSharp
}
}
}
+
+ private static void GuardAvx2(string operation)
+ {
+ if (!IsAvx2CompatibleArchitecture)
+ {
+ throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!");
+ }
+ }
}
}
\ No newline at end of file