Merge branch 'master' into bp/quantizeblocksse

4 years ago · 1864ca4bcd
4 changed files with 156 additions and 45 deletions
--- a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs
@ -1,6 +1,8 @@
 // Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.

+using System.Runtime.CompilerServices;
+
 namespace SixLabors.ImageSharp.Formats.Webp.Lossless
 {
    /// <summary>
@ -41,6 +43,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
        /// Inserts a new color into the cache.
        /// </summary>
        /// <param name="bgra">The color to insert.</param>
+        [MethodImpl(InliningOptions.ShortMethod)]
        public void Insert(uint bgra)
        {
            int key = HashPix(bgra, this.HashShift);
@ -52,6 +55,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
        /// </summary>
        /// <param name="key">The key to lookup.</param>
        /// <returns>The color for the key.</returns>
+        [MethodImpl(InliningOptions.ShortMethod)]
        public uint Lookup(int key) => this.Colors[key];

        /// <summary>
@ -59,6 +63,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
        /// </summary>
        /// <param name="bgra">The color to check.</param>
        /// <returns>The index of the color in the cache or -1 if its not present.</returns>
+        [MethodImpl(InliningOptions.ShortMethod)]
        public int Contains(uint bgra)
        {
            int key = HashPix(bgra, this.HashShift);
@ -70,6 +75,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
        /// </summary>
        /// <param name="bgra">The color.</param>
        /// <returns>The index for the color.</returns>
+        [MethodImpl(InliningOptions.ShortMethod)]
        public int GetIndex(uint bgra) => HashPix(bgra, this.HashShift);

        /// <summary>
@ -77,8 +83,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
        /// </summary>
        /// <param name="key">The key.</param>
        /// <param name="bgra">The color to add.</param>
+        [MethodImpl(InliningOptions.ShortMethod)]
        public void Set(uint key, uint bgra) => this.Colors[key] = bgra;

+        [MethodImpl(InliningOptions.ShortMethod)]
        public static int HashPix(uint argb, int shift) => (int)((argb * HashMul) >> shift);
    }
 }
--- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
@ -765,6 +765,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
        /// <summary>
        /// Fast calculation of log2(v) for integer input.
        /// </summary>
+        [MethodImpl(InliningOptions.ShortMethod)]
        public static float FastLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.Log2Table[v] : FastLog2Slow(v);

        /// <summary>
@ -793,7 +794,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless

        private static float FastSLog2Slow(uint v)
        {
-            Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
+            DebugGuard.MustBeGreaterThanOrEqualTo<uint>(v, LogLookupIdxMax, nameof(v));
            if (v < ApproxLogWithCorrectionMax)
            {
                int logCnt = 0;
@ -1214,30 +1215,65 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless

        private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2)
        {
-            int a = AddSubtractComponentFull(
-                (int)(c0 >> 24),
-                (int)(c1 >> 24),
-                (int)(c2 >> 24));
-            int r = AddSubtractComponentFull(
-                (int)((c0 >> 16) & 0xff),
-                (int)((c1 >> 16) & 0xff),
-                (int)((c2 >> 16) & 0xff));
-            int g = AddSubtractComponentFull(
-                (int)((c0 >> 8) & 0xff),
-                (int)((c1 >> 8) & 0xff),
-                (int)((c2 >> 8) & 0xff));
-            int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
-            return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Sse2.IsSupported)
+            {
+                Vector128<byte> c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128<byte>.Zero);
+                Vector128<byte> c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128<byte>.Zero);
+                Vector128<byte> c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128<byte>.Zero);
+                Vector128<short> v1 = Sse2.Add(c0Vec.AsInt16(), c1Vec.AsInt16());
+                Vector128<short> v2 = Sse2.Subtract(v1, c2Vec.AsInt16());
+                Vector128<byte> b = Sse2.PackUnsignedSaturate(v2, v2);
+                uint output = Sse2.ConvertToUInt32(b.AsUInt32());
+                return output;
+            }
+#endif
+            {
+                int a = AddSubtractComponentFull(
+                    (int)(c0 >> 24),
+                    (int)(c1 >> 24),
+                    (int)(c2 >> 24));
+                int r = AddSubtractComponentFull(
+                    (int)((c0 >> 16) & 0xff),
+                    (int)((c1 >> 16) & 0xff),
+                    (int)((c2 >> 16) & 0xff));
+                int g = AddSubtractComponentFull(
+                    (int)((c0 >> 8) & 0xff),
+                    (int)((c1 >> 8) & 0xff),
+                    (int)((c2 >> 8) & 0xff));
+                int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
+                return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+            }
        }

        private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2)
        {
-            uint ave = Average2(c0, c1);
-            int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
-            int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
-            int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
-            int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
-            return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Sse2.IsSupported)
+            {
+                Vector128<byte> c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128<byte>.Zero);
+                Vector128<byte> c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128<byte>.Zero);
+                Vector128<byte> b0 = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128<byte>.Zero);
+                Vector128<short> avg = Sse2.Add(c1Vec.AsInt16(), c0Vec.AsInt16());
+                Vector128<short> a0 = Sse2.ShiftRightLogical(avg, 1);
+                Vector128<short> a1 = Sse2.Subtract(a0, b0.AsInt16());
+                Vector128<short> bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16());
+                Vector128<short> a2 = Sse2.Subtract(a1, bgta);
+                Vector128<short> a3 = Sse2.ShiftRightArithmetic(a2, 1);
+                Vector128<short> a4 = Sse2.Add(a0, a3).AsInt16();
+                Vector128<byte> a5 = Sse2.PackUnsignedSaturate(a4, a4);
+                uint output = Sse2.ConvertToUInt32(a5.AsUInt32());
+                return output;
+            }
+#endif
+            {
+                uint ave = Average2(c0, c1);
+                int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
+                int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
+                int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
+                int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
+                return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+            }
        }

        [MethodImpl(InliningOptions.ShortMethod)]
@ -1275,11 +1311,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
                    Vector128<byte> pb = Sse2.UnpackLow(bc, Vector128<byte>.Zero); // |b - c|
                    Vector128<ushort> diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16());
                    Sse2.Store((ushort*)p, diff);
+                    int paMinusPb = output[3] + output[2] + output[1] + output[0];
+                    return (paMinusPb <= 0) ? a : b;
                }
-
-                int paMinusPb = output[0] + output[1] + output[2] + output[3];
-
-                return (paMinusPb <= 0) ? a : b;
            }
            else
 #endif
--- a/src/ImageSharp/Image.Decode.cs
+++ b/src/ImageSharp/Image.Decode.cs
@ -58,31 +58,42 @@ namespace SixLabors.ImageSharp
                return null;
            }

-            using (IMemoryOwner<byte> buffer = config.MemoryAllocator.Allocate<byte>(headerSize, AllocationOptions.Clean))
+            // Header sizes are so small, that headersBuffer will be always stackalloc-ed in practice,
+            // and heap allocation will never happen, there is no need for the usual try-finally ArrayPool dance.
+            // The array case is only a safety mechanism following stackalloc best practices.
+            Span<byte> headersBuffer = headerSize > 512 ? new byte[headerSize] : stackalloc byte[headerSize];
+            long startPosition = stream.Position;
+
+            // Read doesn't always guarantee the full returned length so read a byte
+            // at a time until we get either our count or hit the end of the stream.
+            int n = 0;
+            int i;
+            do
            {
-                Span<byte> bufferSpan = buffer.GetSpan();
-                long startPosition = stream.Position;
+                i = stream.Read(headersBuffer, n, headerSize - n);
+                n += i;
+            }
+            while (n < headerSize && i > 0);

-                // Read doesn't always guarantee the full returned length so read a byte
-                // at a time until we get either our count or hit the end of the stream.
-                int n = 0;
-                int i;
-                do
+            stream.Position = startPosition;
+
+            // Does the given stream contain enough data to fit in the header for the format
+            // and does that data match the format specification?
+            // Individual formats should still check since they are public.
+            IImageFormat format = null;
+            foreach (IImageFormatDetector formatDetector in config.ImageFormatsManager.FormatDetectors)
+            {
+                if (formatDetector.HeaderSize <= headerSize)
                {
-                    i = stream.Read(bufferSpan, n, headerSize - n);
-                    n += i;
+                    IImageFormat attemptFormat = formatDetector.DetectFormat(headersBuffer);
+                    if (attemptFormat != null)
+                    {
+                        format = attemptFormat;
+                    }
                }
-                while (n < headerSize && i > 0);
-
-                stream.Position = startPosition;
-
-                // Does the given stream contain enough data to fit in the header for the format
-                // and does that data match the format specification?
-                // Individual formats should still check since they are public.
-                return config.ImageFormatsManager.FormatDetectors
-                    .Where(x => x.HeaderSize <= headerSize)
-                    .Select(x => x.DetectFormat(buffer.GetSpan())).LastOrDefault(x => x != null);
            }
+
+            return format;
        }

        /// <summary>
--- a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
+++ b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
@ -153,9 +153,55 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
            }
        }

+        private static void RunPredictor12Test()
+        {
+            // arrange
+            uint[] topData = { 4294844413, 4294779388 };
+            uint left = 4294844413;
+            uint expectedResult = 4294779388;
+
+            // act
+            unsafe
+            {
+                fixed (uint* top = &topData[1])
+                {
+                    uint actual = LosslessUtils.Predictor12(left, top);
+
+                    // assert
+                    Assert.Equal(expectedResult, actual);
+                }
+            }
+        }
+
+        private static void RunPredictor13Test()
+        {
+            // arrange
+            uint[] topData = { 4278193922, 4278193666 };
+            uint left = 4278193410;
+            uint expectedResult = 4278193154;
+
+            // act
+            unsafe
+            {
+                fixed (uint* top = &topData[1])
+                {
+                    uint actual = LosslessUtils.Predictor13(left, top);
+
+                    // assert
+                    Assert.Equal(expectedResult, actual);
+                }
+            }
+        }
+
        [Fact]
        public void Predictor11_Works() => RunPredictor11Test();

+        [Fact]
+        public void Predictor12_Works() => RunPredictor12Test();
+
+        [Fact]
+        public void Predictor13_Works() => RunPredictor13Test();
+
        [Fact]
        public void SubtractGreen_Works() => RunSubtractGreenTest();

@ -175,6 +221,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
        [Fact]
        public void Predictor11_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.DisableSSE2);

+        [Fact]
+        public void Predictor12_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.AllowAll);
+
+        [Fact]
+        public void Predictor12_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.DisableSSE2);
+
+        [Fact]
+        public void Predictor13_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.AllowAll);
+
+        [Fact]
+        public void Predictor13_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.DisableSSE2);
+
        [Fact]
        public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll);