Merge branch 'master' into webp

5 years ago · 91bf3830c4
7 changed files with 396 additions and 34 deletions
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs
@ -1,16 +1,17 @@
 // Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.

+using System;
 using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp.PixelFormats;

 namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
 {
    /// <summary>
    /// Provides 8-bit lookup tables for converting from Rgb to YCbCr colorspace.
    /// Methods to build the tables are based on libjpeg implementation.
-    /// TODO: Replace this logic with SIMD conversion (similar to the one in the decoder)!
    /// </summary>
-    internal unsafe struct RgbToYCbCrTables
+    internal unsafe struct RgbToYCbCrConverterLut
    {
        /// <summary>
        /// The red luminance table
@ -63,10 +64,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
        /// <summary>
        /// Initializes the YCbCr tables
        /// </summary>
-        /// <returns>The initialized <see cref="RgbToYCbCrTables"/></returns>
-        public static RgbToYCbCrTables Create()
+        /// <returns>The initialized <see cref="RgbToYCbCrConverterLut"/></returns>
+        public static RgbToYCbCrConverterLut Create()
        {
-            RgbToYCbCrTables tables = default;
+            RgbToYCbCrConverterLut tables = default;

            for (int i = 0; i <= 255; i++)
            {
@ -92,11 +93,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
        }

        /// <summary>
-        /// TODO: Replace this logic with SIMD conversion (similar to the one in the decoder)!
        /// Optimized method to allocates the correct y, cb, and cr values to the DCT blocks from the given r, g, b values.
        /// </summary>
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public void ConvertPixelInto(
+        private void ConvertPixelInto(
            int r,
            int g,
            int b,
@ -111,10 +111,29 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
            // float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b));
            cbResult[i] = (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits;

-            // float cr = MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero);
+            // float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b));
            crResult[i] = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits;
        }

+        public void Convert(Span<Rgb24> rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock)
+        {
+            ref Rgb24 rgbStart = ref rgbSpan[0];
+
+            for (int i = 0; i < 64; i++)
+            {
+                ref Rgb24 c = ref Unsafe.Add(ref rgbStart, i);
+
+                this.ConvertPixelInto(
+                    c.R,
+                    c.G,
+                    c.B,
+                    ref yBlock,
+                    ref cbBlock,
+                    ref crBlock,
+                    i);
+            }
+        }
+
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static int Fix(float x)
            => (int)((x * (1L << ScaleBits)) + 0.5F);
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs
@ -0,0 +1,120 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Diagnostics;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
+{
+    internal static class RgbToYCbCrConverterVectorized
+    {
+        public static bool IsSupported
+        {
+            get
+            {
+#if SUPPORTS_RUNTIME_INTRINSICS
+                return Avx2.IsSupported;
+#else
+                return false;
+#endif
+            }
+        }
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+        private static ReadOnlySpan<byte> MoveFirst24BytesToSeparateLanes => new byte[]
+        {
+            0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0,
+            3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0
+        };
+
+        private static ReadOnlySpan<byte> MoveLast24BytesToSeparateLanes => new byte[]
+        {
+            2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0,
+            5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0
+        };
+
+        private static ReadOnlySpan<byte> ExtractRgb => new byte[]
+        {
+            0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF,
+            0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF
+        };
+#endif
+
+        public static void Convert(ReadOnlySpan<Rgb24> rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock)
+        {
+            Debug.Assert(IsSupported, "AVX2 is required to run this converter");
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+            var f0299 = Vector256.Create(0.299f);
+            var f0587 = Vector256.Create(0.587f);
+            var f0114 = Vector256.Create(0.114f);
+            var fn0168736 = Vector256.Create(-0.168736f);
+            var fn0331264 = Vector256.Create(-0.331264f);
+            var f128 = Vector256.Create(128f);
+            var fn0418688 = Vector256.Create(-0.418688f);
+            var fn0081312F = Vector256.Create(-0.081312F);
+            var f05 = Vector256.Create(0.5f);
+            var zero = Vector256.Create(0).AsByte();
+
+            ref Vector256<byte> inRef = ref Unsafe.As<Rgb24, Vector256<byte>>(ref MemoryMarshal.GetReference(rgbSpan));
+            ref Vector256<float> destYRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref yBlock);
+            ref Vector256<float> destCbRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref cbBlock);
+            ref Vector256<float> destCrRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref crBlock);
+
+            var extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes));
+            var extractRgbMask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ExtractRgb));
+            Vector256<byte> rgb, rg, bx;
+            Vector256<float> r, g, b;
+            for (int i = 0; i < 7; i++)
+            {
+                rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)(24 * i)).AsUInt32(), extractToLanesMask).AsByte();
+
+                rgb = Avx2.Shuffle(rgb, extractRgbMask);
+
+                rg = Avx2.UnpackLow(rgb, zero);
+                bx = Avx2.UnpackHigh(rgb, zero);
+
+                r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32());
+                g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32());
+                b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32());
+
+                // (0.299F * r) + (0.587F * g) + (0.114F * b);
+                Unsafe.Add(ref destYRef, i) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r);
+
+                // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b))
+                Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r));
+
+                // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b))
+                Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r));
+            }
+
+            extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(MoveLast24BytesToSeparateLanes));
+            rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)160).AsUInt32(), extractToLanesMask).AsByte();
+            rgb = Avx2.Shuffle(rgb, extractRgbMask);
+
+            rg = Avx2.UnpackLow(rgb, zero);
+            bx = Avx2.UnpackHigh(rgb, zero);
+
+            r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32());
+            g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32());
+            b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32());
+
+            // (0.299F * r) + (0.587F * g) + (0.114F * b);
+            Unsafe.Add(ref destYRef, 7) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r);
+
+            // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b))
+            Unsafe.Add(ref destCbRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r));
+
+            // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b))
+            Unsafe.Add(ref destCrRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r));
+#endif
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs
@ -2,7 +2,6 @@
 // Licensed under the Apache License, Version 2.0.

 using System;
-using System.Runtime.CompilerServices;
 using SixLabors.ImageSharp.Advanced;
 using SixLabors.ImageSharp.PixelFormats;

@ -33,7 +32,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
        /// <summary>
        /// The color conversion tables
        /// </summary>
-        private RgbToYCbCrTables colorTables;
+        private RgbToYCbCrConverterLut colorTables;

        /// <summary>
        /// Temporal 8x8 block to hold TPixel data
@ -48,7 +47,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
        public static YCbCrForwardConverter<TPixel> Create()
        {
            var result = default(YCbCrForwardConverter<TPixel>);
-            result.colorTables = RgbToYCbCrTables.Create();
+            if (!RgbToYCbCrConverterVectorized.IsSupported)
+            {
+                // Avoid creating lookup tables, when vectorized converter is supported
+                result.colorTables = RgbToYCbCrConverterLut.Create();
+            }
+
            return result;
        }

@ -65,20 +69,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
            ref Block8x8F yBlock = ref this.Y;
            ref Block8x8F cbBlock = ref this.Cb;
            ref Block8x8F crBlock = ref this.Cr;
-            ref Rgb24 rgbStart = ref rgbSpan[0];

-            for (int i = 0; i < 64; i++)
+            if (RgbToYCbCrConverterVectorized.IsSupported)
            {
-                ref Rgb24 c = ref Unsafe.Add(ref rgbStart, i);
-
-                this.colorTables.ConvertPixelInto(
-                    c.R,
-                    c.G,
-                    c.B,
-                    ref yBlock,
-                    ref cbBlock,
-                    ref crBlock,
-                    i);
+                RgbToYCbCrConverterVectorized.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
+            }
+            else
+            {
+                this.colorTables.Convert(rgbSpan,  ref yBlock, ref cbBlock, ref crBlock);
            }
        }
    }
--- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs
+++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs
@ -4,6 +4,11 @@
 using System;
 using System.Numerics;
 using System.Runtime.CompilerServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif

 namespace SixLabors.ImageSharp.Processing.Processors.Transforms
 {
@ -66,21 +71,94 @@ namespace SixLabors.ImageSharp.Processing.Processors.Transforms
        [MethodImpl(InliningOptions.ShortMethod)]
        public Vector4 ConvolveCore(ref Vector4 rowStartRef)
        {
-            ref float horizontalValues = ref Unsafe.AsRef<float>(this.bufferPtr);
+#if SUPPORTS_RUNTIME_INTRINSICS
+            if (Fma.IsSupported)
+            {
+                float* bufferStart = this.bufferPtr;
+                float* bufferEnd = bufferStart + (this.Length & ~3);
+                Vector256<float> result256_0 = Vector256<float>.Zero;
+                Vector256<float> result256_1 = Vector256<float>.Zero;
+                ReadOnlySpan<byte> maskBytes = new byte[]
+                {
+                    0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0,
+                    1, 0, 0, 0, 1, 0, 0, 0,
+                    1, 0, 0, 0, 1, 0, 0, 0,
+                };
+                Vector256<int> mask = Unsafe.ReadUnaligned<Vector256<int>>(ref MemoryMarshal.GetReference(maskBytes));

-            // Destination color components
-            Vector4 result = Vector4.Zero;
+                while (bufferStart < bufferEnd)
+                {
+                    // It is important to use a single expression here so that the JIT will correctly use vfmadd231ps
+                    // for the FMA operation, and execute it directly on the target register and reading directly from
+                    // memory for the first parameter. This skips initializing a SIMD register, and an extra copy.
+                    // The code below should compile in the following assembly on .NET 5 x64:
+                    //
+                    // vmovsd xmm2, [rax]               ; load *(double*)bufferStart into xmm2 as [ab, _]
+                    // vpermps ymm2, ymm1, ymm2         ; permute as a float YMM register to [a, a, a, a, b, b, b, b]
+                    // vfmadd231ps ymm0, ymm2, [r8]     ; result256_0 = FMA(pixels, factors) + result256_0
+                    //
+                    // For tracking the codegen issue with FMA, see: https://github.com/dotnet/runtime/issues/12212.
+                    // Additionally, we're also unrolling two computations per each loop iterations to leverage the
+                    // fact that most CPUs have two ports to schedule multiply operations for FMA instructions.
+                    result256_0 = Fma.MultiplyAdd(
+                        Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef),
+                        Avx2.PermuteVar8x32(Vector256.CreateScalarUnsafe(*(double*)bufferStart).AsSingle(), mask),
+                        result256_0);

-            for (int i = 0; i < this.Length; i++)
-            {
-                float weight = Unsafe.Add(ref horizontalValues, i);
+                    result256_1 = Fma.MultiplyAdd(
+                        Unsafe.As<Vector4, Vector256<float>>(ref Unsafe.Add(ref rowStartRef, 2)),
+                        Avx2.PermuteVar8x32(Vector256.CreateScalarUnsafe(*(double*)(bufferStart + 2)).AsSingle(), mask),
+                        result256_1);
+
+                    bufferStart += 4;
+                    rowStartRef = ref Unsafe.Add(ref rowStartRef, 4);
+                }
+
+                result256_0 = Avx.Add(result256_0, result256_1);
+
+                if ((this.Length & 3) >= 2)
+                {
+                    result256_0 = Fma.MultiplyAdd(
+                        Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef),
+                        Avx2.PermuteVar8x32(Vector256.CreateScalarUnsafe(*(double*)bufferStart).AsSingle(), mask),
+                        result256_0);
+
+                    bufferStart += 2;
+                    rowStartRef = ref Unsafe.Add(ref rowStartRef, 2);
+                }

-                // Vector4 v = offsetedRowSpan[i];
-                Vector4 v = Unsafe.Add(ref rowStartRef, i);
-                result += v * weight;
+                Vector128<float> result128 = Sse.Add(result256_0.GetLower(), result256_0.GetUpper());
+
+                if ((this.Length & 1) != 0)
+                {
+                    result128 = Fma.MultiplyAdd(
+                        Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef),
+                        Vector128.Create(*bufferStart),
+                        result128);
+                }
+
+                return *(Vector4*)&result128;
            }
+            else
+#endif
+            {
+                // Destination color components
+                Vector4 result = Vector4.Zero;
+                float* bufferStart = this.bufferPtr;
+                float* bufferEnd = this.bufferPtr + this.Length;

-            return result;
+                while (bufferStart < bufferEnd)
+                {
+                    // Vector4 v = offsetedRowSpan[i];
+                    result += rowStartRef * *bufferStart;
+
+                    bufferStart++;
+                    rowStartRef = ref Unsafe.Add(ref rowStartRef, 1);
+                }
+
+                return result;
+            }
        }

        /// <summary>
--- a/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs
+++ b/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs
@ -0,0 +1,56 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using BenchmarkDotNet.Attributes;
+using SixLabors.ImageSharp.Formats.Jpeg.Components;
+using SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder;
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp.Benchmarks.Format.Jpeg.Components.Encoder
+{
+    public class YCbCrForwardConverterBenchmark
+    {
+        private RgbToYCbCrConverterLut converter;
+        private Rgb24[] data;
+
+        [GlobalSetup]
+        public void Setup()
+        {
+            this.converter = RgbToYCbCrConverterLut.Create();
+
+            var r = new Random(42);
+            this.data = new Rgb24[64];
+
+            var d = new byte[3];
+            for (int i = 0; i < this.data.Length; i++)
+            {
+                r.NextBytes(d);
+                this.data[i] = new Rgb24(d[0], d[1], d[2]);
+            }
+        }
+
+        [Benchmark(Baseline = true)]
+        public void ConvertLut()
+        {
+            Block8x8F y = default;
+            Block8x8F cb = default;
+            Block8x8F cr = default;
+
+            this.converter.Convert(this.data.AsSpan(), ref y, ref cb, ref cr);
+        }
+
+        [Benchmark]
+        public void ConvertVectorized()
+        {
+            Block8x8F y = default;
+            Block8x8F cb = default;
+            Block8x8F cr = default;
+
+            if (RgbToYCbCrConverterVectorized.IsSupported)
+            {
+                RgbToYCbCrConverterVectorized.Convert(this.data.AsSpan(), ref y, ref cb, ref cr);
+            }
+        }
+    }
+}
--- a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs
@ -0,0 +1,91 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using SixLabors.ImageSharp.ColorSpaces;
+using SixLabors.ImageSharp.Formats.Jpeg.Components;
+using SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder;
+using SixLabors.ImageSharp.PixelFormats;
+using SixLabors.ImageSharp.Tests.Colorspaces.Conversion;
+using Xunit;
+using Xunit.Abstractions;
+
+// ReSharper disable InconsistentNaming
+namespace SixLabors.ImageSharp.Tests.Formats.Jpg
+{
+    public class RgbToYCbCrConverterTests
+    {
+        public RgbToYCbCrConverterTests(ITestOutputHelper output)
+        {
+            this.Output = output;
+        }
+
+        private ITestOutputHelper Output { get; }
+
+        [Fact]
+        public void TestLutConverter()
+        {
+            Rgb24[] data = CreateTestData();
+            var target = RgbToYCbCrConverterLut.Create();
+
+            Block8x8F y = default;
+            Block8x8F cb = default;
+            Block8x8F cr = default;
+
+            target.Convert(data.AsSpan(), ref y, ref cb, ref cr);
+
+            Verify(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(1F));
+        }
+
+        [Fact]
+        public void TestVectorizedConverter()
+        {
+            if (!RgbToYCbCrConverterVectorized.IsSupported)
+            {
+                this.Output.WriteLine("No AVX and/or FMA present, skipping test!");
+                return;
+            }
+
+            Rgb24[] data = CreateTestData();
+
+            Block8x8F y = default;
+            Block8x8F cb = default;
+            Block8x8F cr = default;
+
+            RgbToYCbCrConverterVectorized.Convert(data.AsSpan(), ref y, ref cb, ref cr);
+
+            Verify(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(0.0001F));
+        }
+
+        private static void Verify(ReadOnlySpan<Rgb24> data, ref Block8x8F yResult, ref Block8x8F cbResult, ref Block8x8F crResult, ApproximateColorSpaceComparer comparer)
+        {
+            for (int i = 0; i < data.Length; i++)
+            {
+                int r = data[i].R;
+                int g = data[i].G;
+                int b = data[i].B;
+
+                float y = (0.299F * r) + (0.587F * g) + (0.114F * b);
+                float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b));
+                float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b));
+
+                Assert.True(comparer.Equals(new YCbCr(y, cb, cr), new YCbCr(yResult[i], cbResult[i], crResult[i])), $"Pos {i}, Expected {y} == {yResult[i]}, {cb} == {cbResult[i]}, {cr} == {crResult[i]}");
+            }
+        }
+
+        private static Rgb24[] CreateTestData()
+        {
+            var data = new Rgb24[64];
+            var r = new Random();
+
+            var random = new byte[3];
+            for (int i = 0; i < data.Length; i++)
+            {
+                r.NextBytes(random);
+                data[i] = new Rgb24(random[0], random[1], random[2]);
+            }
+
+            return data;
+        }
+    }
+}
--- a/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeTests.cs
+++ b/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeTests.cs
@ -139,7 +139,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Transforms
                        testOutputDetails: workingBufferLimitInRows,
                        appendPixelTypeToFileName: false);
                    image.CompareToReferenceOutput(
-                        ImageComparer.TolerantPercentage(0.001f),
+                        ImageComparer.TolerantPercentage(0.004f),
                        provider,
                        testOutputDetails: workingBufferLimitInRows,
                        appendPixelTypeToFileName: false);