diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs b/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
index e4dc1a1d8..a8b343498 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
@@ -46,53 +46,6 @@ namespace SixLabors.ImageSharp
                 }
             }
 
-            /// <summary>
-            /// Convert 'source.Length' <see cref="float"/> values normalized into [0..1] from 'source'
-            /// into 'dest' buffer of <see cref="byte"/>. The values are scaled up into [0-255] and rounded.
-            /// The implementation is SIMD optimized and works only with `source.Length` divisible by 8/>.
-            /// Based on:
-            /// <see>
-            ///     <cref>http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions</cref>
-            /// </see>
-            /// </summary>
-            internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan<float> source, Span<byte> dest)
-            {
-                GuardAvx2(nameof(BulkConvertNormalizedFloatToByte));
-
-                DebugGuard.IsTrue((source.Length % Vector<float>.Count) == 0, nameof(source), "source.Length should be divisable by Vector<float>.Count!");
-
-                if (source.Length == 0)
-                {
-                    return;
-                }
-
-                ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
-                ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(dest));
-                int n = source.Length / 8;
-
-                Vector<float> magick = new Vector<float>(32768.0f);
-                Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
-
-                // need to copy to a temporary struct, because
-                // SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
-                // does not work. TODO: This might be a CoreClr bug, need to ask/report
-                var temp = default(Octet.OfUInt32);
-                ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp);
-
-                for (int i = 0; i < n; i++)
-                {
-                    // union { float f; uint32_t i; } u;
-                    // u.f = 32768.0f + x * (255.0f / 256.0f);
-                    // return (uint8_t)u.i;
-                    Vector<float> x = Unsafe.Add(ref srcBase, i);
-                    x = (x * scale) + magick;
-                    tempRef = x;
-
-                    ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
-                    d.LoadFrom(ref temp);
-                }
-            }
-
             /// <summary>
             /// SIMD optimized implementation for <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/>.
             /// Works only with `dest.Length` divisible by 8.
@@ -165,7 +118,7 @@ namespace SixLabors.ImageSharp
             }
 
             /// <summary>
-            /// Same as <see cref="BulkConvertNormalizedFloatToByte"/> but clamps overflown values before conversion.
+            /// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/> which is faster on older runtimes.
             /// </summary>
             internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest)
             {
@@ -207,6 +160,53 @@ namespace SixLabors.ImageSharp
                     d.LoadFrom(ref temp);
                 }
             }
+
+            /// <summary>
+            /// Convert 'source.Length' <see cref="float"/> values normalized into [0..1] from 'source'
+            /// into 'dest' buffer of <see cref="byte"/>. The values are scaled up into [0-255] and rounded.
+            /// The implementation is SIMD optimized and works only with `source.Length` divisible by 8.
+            /// Based on:
+            /// <see>
+            ///     <cref>http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions</cref>
+            /// </see>
+            /// </summary>
+            internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan<float> source, Span<byte> dest)
+            {
+                GuardAvx2(nameof(BulkConvertNormalizedFloatToByte));
+
+                DebugGuard.IsTrue((source.Length % Vector<float>.Count) == 0, nameof(source), "source.Length should be divisable by Vector<float>.Count!");
+
+                if (source.Length == 0)
+                {
+                    return;
+                }
+
+                ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
+                ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(dest));
+                int n = source.Length / 8;
+
+                Vector<float> magick = new Vector<float>(32768.0f);
+                Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
+
+                // need to copy to a temporary struct, because
+                // SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
+                // does not work. TODO: This might be a CoreClr bug, need to ask/report
+                var temp = default(Octet.OfUInt32);
+                ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp);
+
+                for (int i = 0; i < n; i++)
+                {
+                    // union { float f; uint32_t i; } u;
+                    // u.f = 32768.0f + x * (255.0f / 256.0f);
+                    // return (uint8_t)u.i;
+                    Vector<float> x = Unsafe.Add(ref srcBase, i);
+                    x = (x * scale) + magick;
+                    tempRef = x;
+
+                    ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
+                    d.LoadFrom(ref temp);
+                }
+            }
         }
     }
 }
\ No newline at end of file
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
index 5c0b8ee93..fd263b54c 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
@@ -52,9 +52,8 @@ namespace SixLabors.ImageSharp
             }
 
             /// <summary>
-            /// A variant of <see cref="BasicIntrinsics256.BulkConvertByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime.
+            /// Implementation <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime.
             /// </summary>
-            // ReSharper disable once MemberHidesStaticFromOuterClass
             internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
             {
                 DebugGuard.IsTrue(
@@ -116,13 +115,8 @@ namespace SixLabors.ImageSharp
             }
 
             /// <summary>
-            /// A variant of <see cref="BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflows"/>, which is faster on new .NET runtime.
+            /// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/>, which is faster on new .NET runtime.
             /// </summary>
-            /// <remarks>
-            /// It does NOT worth yet to utilize this method (2018 Oct).
-            /// See benchmark results for the "PackFromVector4_Rgba32" benchmark!
-            /// TODO: Check again later!
-            /// </remarks>
             internal static void BulkConvertNormalizedFloatToByteClampOverflows(
                 ReadOnlySpan<float> source,
                 Span<byte> dest)
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs
index 73e9bacfa..111ac2240 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs
@@ -22,17 +22,10 @@ namespace SixLabors.ImageSharp
         public static bool IsAvx2CompatibleArchitecture { get; } =
             Vector.IsHardwareAccelerated && Vector<float>.Count == 8 && Vector<int>.Count == 8;
 
-        internal static void GuardAvx2(string operation)
-        {
-            if (!IsAvx2CompatibleArchitecture)
-            {
-                throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!");
-            }
-        }
-
         /// <summary>
         /// Transform all scalars in 'v' in a way that converting them to <see cref="int"/> would have rounding semantics.
         /// </summary>
+        /// <param name="v">The vector</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static Vector4 PseudoRound(this Vector4 v)
         {
@@ -48,14 +41,15 @@ namespace SixLabors.ImageSharp
         ///     <cref>https://github.com/g-truc/glm/blob/master/glm/simd/common.h#L110</cref>
         /// </see>
         /// </summary>
+        /// <param name="v">The vector</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static Vector<float> FastRound(this Vector<float> x)
+        internal static Vector<float> FastRound(this Vector<float> v)
         {
             Vector<int> magic0 = new Vector<int>(int.MinValue); // 0x80000000
             Vector<float> sgn0 = Vector.AsVectorSingle(magic0);
-            Vector<float> and0 = Vector.BitwiseAnd(sgn0, x);
+            Vector<float> and0 = Vector.BitwiseAnd(sgn0, v);
             Vector<float> or0 = Vector.BitwiseOr(and0, new Vector<float>(8388608.0f));
-            Vector<float> add0 = Vector.Add(x, or0);
+            Vector<float> add0 = Vector.Add(v, or0);
             Vector<float> sub0 = Vector.Subtract(add0, or0);
             return sub0;
         }
@@ -65,6 +59,8 @@ namespace SixLabors.ImageSharp
         /// <paramref name="source"/> should be the of the same size as <paramref name="dest"/>,
         /// but there are no restrictions on the span's length.
         /// </summary>
+        /// <param name="source">The source span of bytes</param>
+        /// <param name="dest">The destination span of floats</param>
         internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
         {
             DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
@@ -92,6 +88,8 @@ namespace SixLabors.ImageSharp
         /// <paramref name="source"/> should be the of the same size as <paramref name="dest"/>,
         /// but there are no restrictions on the span's length.
         /// </summary>
+        /// <param name="source">The source span of floats</param>
+        /// <param name="dest">The destination span of bytes</param>
         internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest)
         {
             DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
@@ -119,5 +117,13 @@ namespace SixLabors.ImageSharp
                 }
             }
         }
+
+        private static void GuardAvx2(string operation)
+        {
+            if (!IsAvx2CompatibleArchitecture)
+            {
+                throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!");
+            }
+        }
     }
 }
\ No newline at end of file