SimdUtils.BulkConvertNormalizedFloatToByte()

9 years ago · ce6fdf9ba8
6 changed files with 275 additions and 14 deletions
--- a/src/ImageSharp/Common/Extensions/SimdUtils.cs
+++ b/src/ImageSharp/Common/Extensions/SimdUtils.cs
@ -7,11 +7,27 @@ using System.Runtime.CompilerServices;

 namespace SixLabors.ImageSharp
 {
+    using System.Diagnostics;
+
    /// <summary>
    /// Various extension and utility methods for <see cref="Vector4"/> and <see cref="Vector{T}"/> utilizing SIMD capabilities
    /// </summary>
    internal static class SimdUtils
    {
+        /// <summary>
+        /// Indicates AVX2 architecture where both float and integer registers are of size 256 byte.
+        /// </summary>
+        public static readonly bool IsAvx2 = Vector<float>.Count == 8 && Vector<int>.Count == 8;
+
+        [Conditional("DEBUG")]
+        internal static void GuardAvx2(string operation)
+        {
+            if (!IsAvx2)
+            {
+                throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!");
+            }
+        }
+
        /// <summary>
        /// Transform all scalars in 'v' in a way that converting them to <see cref="int"/> would have rounding semantics.
        /// </summary>
@ -41,5 +57,117 @@ namespace SixLabors.ImageSharp
            Vector<float> sub0 = Vector.Subtract(add0, or0);
            return sub0;
        }
+
+        /// <summary>
+        /// Convert 'source.Length' <see cref="float"/> values normalized into [0..1] from 'source' into 'dest' buffer of <see cref="byte"/> values.
+        /// The values gonna be scaled up into [0-255] and rounded.
+        /// Based on:
+        /// <see>
+        ///     <cref>http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions</cref>
+        /// </see>
+        /// </summary>
+        internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan<float> source, Span<byte> dest)
+        {
+            GuardAvx2(nameof(BulkConvertNormalizedFloatToByte));
+
+            DebugGuard.IsTrue((source.Length % Vector<float>.Count) == 0, nameof(source), "source.Length should be divisable by Vector<float>.Count!");
+
+            if (source.Length == 0)
+            {
+                return;
+            }
+
+            ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref source.DangerousGetPinnableReference());
+            ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref dest.DangerousGetPinnableReference());
+
+            Vector<float> magick = new Vector<float>(32768.0f);
+            Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
+
+            int n = source.Length;
+
+            for (int i = 0; i < n; i++)
+            {
+                // union { float f; uint32_t i; } u;
+                // u.f = 32768.0f + x * (255.0f / 256.0f);
+                // return (uint8_t)u.i;
+                Vector<float> x = Unsafe.Add(ref srcBase, i);
+                x = (x * scale) + magick;
+
+                Vector<uint> u = Vector.AsVectorUInt32(x);
+
+                Octet.OfUInt32 ii = Unsafe.As<Vector<uint>, Octet.OfUInt32>(ref u);
+
+                ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
+                d.LoadFrom(ref ii);
+            }
+        }
+
+        /// <summary>
+        /// Same as <see cref="BulkConvertNormalizedFloatToByte"/> but clamps overflown values before conversion.
+        /// </summary>
+        internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest)
+        {
+            GuardAvx2(nameof(BulkConvertNormalizedFloatToByte));
+
+            DebugGuard.IsTrue((source.Length % Vector<float>.Count) == 0, nameof(source), "source.Length should be divisable by Vector<float>.Count!");
+
+            if (source.Length == 0)
+            {
+                return;
+            }
+
+            ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref source.DangerousGetPinnableReference());
+            ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref dest.DangerousGetPinnableReference());
+
+            Vector<float> magick = new Vector<float>(32768.0f);
+            Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
+
+            int n = source.Length;
+
+            for (int i = 0; i < n; i++)
+            {
+                // union { float f; uint32_t i; } u;
+                // u.f = 32768.0f + x * (255.0f / 256.0f);
+                // return (uint8_t)u.i;
+                Vector<float> x = Unsafe.Add(ref srcBase, i);
+                x = Vector.Max(x, Vector<float>.Zero);
+                x = Vector.Min(x, Vector<float>.One);
+
+                x = (x * scale) + magick;
+
+                Vector<uint> u = Vector.AsVectorUInt32(x);
+
+                Octet.OfUInt32 ii = Unsafe.As<Vector<uint>, Octet.OfUInt32>(ref u);
+
+                ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
+                d.LoadFrom(ref ii);
+            }
+        }
+
+#pragma warning disable SA1132 // Do not combine fields
+        private static class Octet
+        {
+            public struct OfUInt32
+            {
+                public uint V0, V1, V2, V3, V4, V5, V6, V7;
+            }
+
+            public struct OfByte
+            {
+                public byte V0, V1, V2, V3, V4, V5, V6, V7;
+
+                public void LoadFrom(ref OfUInt32 i)
+                {
+                    this.V0 = (byte)i.V0;
+                    this.V1 = (byte)i.V1;
+                    this.V2 = (byte)i.V2;
+                    this.V3 = (byte)i.V3;
+                    this.V4 = (byte)i.V4;
+                    this.V5 = (byte)i.V5;
+                    this.V6 = (byte)i.V6;
+                    this.V7 = (byte)i.V7;
+                }
+            }
+        }
    }
 }
--- a/src/ImageSharp/Memory/BufferArea{T}.cs
+++ b/src/ImageSharp/Memory/BufferArea{T}.cs
@ -17,17 +17,19 @@ namespace SixLabors.ImageSharp.Memory
        /// </summary>
        public readonly Rectangle Rectangle;

+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        public BufferArea(IBuffer2D<T> destinationBuffer, Rectangle rectangle)
        {
-            Guard.MustBeGreaterThanOrEqualTo(rectangle.X, 0, nameof(rectangle));
-            Guard.MustBeGreaterThanOrEqualTo(rectangle.Y, 0, nameof(rectangle));
-            Guard.MustBeLessThanOrEqualTo(rectangle.Width, destinationBuffer.Width, nameof(rectangle));
-            Guard.MustBeLessThanOrEqualTo(rectangle.Height, destinationBuffer.Height, nameof(rectangle));
+            DebugGuard.MustBeGreaterThanOrEqualTo(rectangle.X, 0, nameof(rectangle));
+            DebugGuard.MustBeGreaterThanOrEqualTo(rectangle.Y, 0, nameof(rectangle));
+            DebugGuard.MustBeLessThanOrEqualTo(rectangle.Width, destinationBuffer.Width, nameof(rectangle));
+            DebugGuard.MustBeLessThanOrEqualTo(rectangle.Height, destinationBuffer.Height, nameof(rectangle));

            this.DestinationBuffer = destinationBuffer;
            this.Rectangle = rectangle;
        }

+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        public BufferArea(IBuffer2D<T> destinationBuffer)
            : this(destinationBuffer, destinationBuffer.FullRectangle())
        {
--- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
+++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
@ -5,7 +5,11 @@ using Xunit;

 namespace SixLabors.ImageSharp.Tests.Common
 {
+    using System.Linq;
+    using System.Runtime.CompilerServices;
+
    using Xunit.Abstractions;
+    using Xunit.Sdk;

    public class SimdUtilsTests
    {
@ -64,14 +68,13 @@ namespace SixLabors.ImageSharp.Tests.Common
            return new Vector<float>(data);
        }

-        private static Vector<float> CreateRandomTestVector(int seed, float scale)
+        private static Vector<float> CreateRandomTestVector(int seed, float min, float max)
        {
            float[] data = new float[Vector<float>.Count];
            Random rnd = new Random();
            for (int i = 0; i < Vector<float>.Count; i++)
            {
-                float v = (float)rnd.NextDouble() - 0.5f;
-                v *= 2 * scale;
+                float v = (float)rnd.NextDouble() * (max-min) + min;
                data[i] = v;
            }
            return new Vector<float>(data);
@ -97,7 +100,7 @@ namespace SixLabors.ImageSharp.Tests.Common
        [InlineData(42, 1000f)]
        public void FastRound_RandomValues(int seed, float scale)
        {
-            Vector<float> v = CreateRandomTestVector(seed, scale);
+            Vector<float> v = CreateRandomTestVector(seed, -scale*0.5f, scale*0.5f);
            Vector<float> r = v.FastRound();

            this.Output.WriteLine(v.ToString());
@ -106,6 +109,86 @@ namespace SixLabors.ImageSharp.Tests.Common
            AssertEvenRoundIsCorrect(r, v);
        }

+        [Theory]
+        [InlineData(1, 0)]
+        [InlineData(1, 8)]
+        [InlineData(2, 16)]
+        [InlineData(3, 128)]
+        public void BulkConvertNormalizedFloatToByte_WithRoundedData(int seed, int count)
+        {
+            float[] orig =  new Random(seed).GenerateRandomRoundedFloatArray(count, 0, 256);
+            float[] normalized = orig.Select(f => f / 255f).ToArray();
+
+            byte[] dest = new byte[count];
+
+            SimdUtils.BulkConvertNormalizedFloatToByte(normalized, dest);
+
+            byte[] expected = orig.Select(f => (byte)(f)).ToArray();
+
+            Assert.Equal(expected, dest);
+        }
+
+        [Theory]
+        [InlineData(1, 0)]
+        [InlineData(1, 8)]
+        [InlineData(2, 16)]
+        [InlineData(3, 128)]
+        public void BulkConvertNormalizedFloatToByte_WithNonRoundedData(int seed, int count)
+        {
+            float[] source = new Random(seed).GenerateRandomFloatArray(count, 0, 1f);
+            
+            byte[] dest = new byte[count];
+            
+            SimdUtils.BulkConvertNormalizedFloatToByte(source, dest);
+
+            byte[] expected = source.Select(f => (byte)Math.Round(f*255f)).ToArray();
+
+            Assert.Equal(expected, dest);
+        }
+
+        private static float Clamp255(float x) => MathF.Min(255f, MathF.Max(0f, x));
+
+        [Theory]
+        [InlineData(1, 0)]
+        [InlineData(1, 8)]
+        [InlineData(2, 16)]
+        [InlineData(3, 128)]
+        public void BulkConvertNormalizedFloatToByteClampOverflows(int seed, int count)
+        {
+            float[] orig = new Random(seed).GenerateRandomRoundedFloatArray(count, -50, 444);
+            float[] normalized = orig.Select(f => f / 255f).ToArray();
+
+            byte[] dest = new byte[count];
+
+            SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(normalized, dest);
+
+            byte[] expected = orig.Select(f => (byte)Clamp255(f)).ToArray();
+
+            Assert.Equal(expected, dest);
+        }
+
+        [Theory]
+        [InlineData(0)]
+        [InlineData(7)]
+        [InlineData(42)]
+        [InlineData(255)]
+        [InlineData(256)]
+        [InlineData(257)]
+        private void MagicConvertToByte(float value)
+        {
+            byte actual = MagicConvert(value / 256f);
+            byte expected = (byte)value;
+
+            Assert.Equal(expected, actual);
+        }
+
+        private static byte MagicConvert(float x)
+        {
+            float f = 32768.0f + x;
+            uint i = Unsafe.As<float, uint>(ref f);
+            return (byte)i;
+        }
+
        private static void AssertEvenRoundIsCorrect(Vector<float> r, Vector<float> v)
        {
            for (int i = 0; i < Vector<float>.Count; i++)
--- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs
@ -228,6 +228,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
            int componentCount,
            int inputBufferLength,
            int seed,
+            float minVal = 0f,
            float maxVal = 255f)
        {
            var rnd = new Random(seed);
@ -238,7 +239,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg

                for (int j = 0; j < inputBufferLength; j++)
                {
-                    values[j] = (float)rnd.NextDouble() * maxVal;
+                    values[j] = (float)rnd.NextDouble() * (maxVal-minVal)+minVal;
                }

                // no need to dispose when buffer is not array owner
--- a/tests/ImageSharp.Tests/Formats/Jpg/JpegProfilingBenchmarks.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegProfilingBenchmarks.cs
@ -1,6 +1,7 @@
 // Copyright (c) Six Labors and contributors.
 // Licensed under the Apache License, Version 2.0.

+// ReSharper disable InconsistentNaming
 namespace SixLabors.ImageSharp.Tests.Formats.Jpg
 {
    using System;
@ -8,7 +9,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
    using System.Linq;
    using System.Numerics;

+    using SixLabors.ImageSharp.Formats;
    using SixLabors.ImageSharp.Formats.Jpeg;
+    using SixLabors.ImageSharp.Formats.Jpeg.GolangPort;
+    using SixLabors.ImageSharp.Formats.Jpeg.PdfJsPort;

    using Xunit;
    using Xunit.Abstractions;
@ -30,9 +34,21 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
            TestImages.Jpeg.Baseline.Jpeg444,
        };

-        //[Theory] // Benchmark, enable manually
-        //[MemberData(nameof(DecodeJpegData))]
-        public void DecodeJpeg(string fileName)
+        [Theory] // Benchmark, enable manually
+        [MemberData(nameof(DecodeJpegData))]
+        public void DecodeJpeg_Original(string fileName)
+        {
+            this.DecodeJpegBenchmarkImpl(fileName, new OrigJpegDecoder());
+        }
+
+        [Theory] // Benchmark, enable manually
+        [MemberData(nameof(DecodeJpegData))]
+        public void DecodeJpeg_PdfJs(string fileName)
+        {
+            this.DecodeJpegBenchmarkImpl(fileName, new PdfJsJpegDecoder());
+        }
+
+        private void DecodeJpegBenchmarkImpl(string fileName, IImageDecoder decoder)
        {
            const int ExecutionCount = 30;

@ -48,11 +64,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
                ExecutionCount,
                () =>
                    {
-                         Image<Rgba32> img = Image.Load<Rgba32>(bytes);
+                        Image<Rgba32> img = Image.Load<Rgba32>(bytes, decoder);
                    },
                // ReSharper disable once ExplicitCallerInfoArgument
                $"Decode {fileName}");
-
        }

        // Benchmark, enable manually!
--- a/tests/ImageSharp.Tests/TestUtilities/TestDataGenerator.cs
+++ b/tests/ImageSharp.Tests/TestUtilities/TestDataGenerator.cs
@ -0,0 +1,32 @@
+using System;
+
+namespace SixLabors.ImageSharp.Tests
+{
+    internal static class TestDataGenerator
+    {
+        public static float[] GenerateRandomFloatArray(this Random rnd, int length, float minVal, float maxVal)
+        {
+            float[] values = new float[length];
+
+            for (int i = 0; i < length; i++)
+            {
+                values[i] = (float)rnd.NextDouble() * (maxVal - minVal) + minVal;
+            }
+
+            return values;
+        }
+
+        public static float[] GenerateRandomRoundedFloatArray(this Random rnd, int length, int minVal, int maxValExclusive)
+        {
+            float[] values = new float[length];
+
+            for (int i = 0; i < length; i++)
+            {
+                int val = rnd.Next(minVal, maxValExclusive);
+                values[i] = (float)val;
+            }
+
+            return values;
+        }
+    }
+}