From 2f4a0ae2aff9be36f8106a19e838235d4fcaf08b Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Tue, 12 Sep 2017 14:01:08 +0200 Subject: [PATCH] go home Vector, you are drunk --- .../Decoder/JpegColorConverter.FromYCbCr.cs | 108 ++++++++++++------ .../Formats/Jpg/JpegColorConverterTests.cs | 17 +-- 2 files changed, 79 insertions(+), 46 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs b/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs index c38a82b42..ab0886619 100644 --- a/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs +++ b/src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs @@ -46,15 +46,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder } } - internal class FromYCbCrSimd256 : JpegColorConverter + internal class FromYCbCrSimd : JpegColorConverter { - public FromYCbCrSimd256() + public FromYCbCrSimd() : base(JpegColorSpace.YCbCr) { } - public static bool IsAvailable => Vector.IsHardwareAccelerated && Vector.Count == 8; - public override void ConvertToRGBA(ComponentValues values, Span result) { int remainder = result.Length % 8; @@ -72,25 +70,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder /// internal static void ConvertCore(ComponentValues values, Span result) { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); - } + DebugGuard.IsTrue(result.Length % 8 == 0, nameof(result), "result.Length should be divisable by 8!"); - ref Vector yBase = - ref Unsafe.As>(ref values.Component0.DangerousGetPinnableReference()); - ref Vector cbBase = - ref Unsafe.As>(ref values.Component1.DangerousGetPinnableReference()); - ref Vector crBase = - ref Unsafe.As>(ref values.Component2.DangerousGetPinnableReference()); + ref Vector4Pair yBase = + ref Unsafe.As(ref values.Component0.DangerousGetPinnableReference()); + ref Vector4Pair cbBase = + ref Unsafe.As(ref values.Component1.DangerousGetPinnableReference()); + ref Vector4Pair crBase = + ref Unsafe.As(ref values.Component2.DangerousGetPinnableReference()); ref Vector4Octet resultBase = ref Unsafe.As(ref result.DangerousGetPinnableReference()); - var chromaOffset = new Vector(-128f); + var chromaOffset = new Vector4(-128f); // Walking 8 elements at one step: int n = result.Length / 8; @@ -100,47 +92,87 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder // y = yVals[i]; // cb = cbVals[i] - 128F; // cr = crVals[i] - 128F; - Vector y = Unsafe.Add(ref yBase, i); - Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; - Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; + Vector4Pair y = Unsafe.Add(ref yBase, i); + Vector4Pair cb = Unsafe.Add(ref cbBase, i); + Vector4Pair cr = Unsafe.Add(ref crBase, i); + cb.AddInplace(chromaOffset); + cr.AddInplace(chromaOffset); // r = y + (1.402F * cr); + Vector4Pair r = y; + Vector4Pair tmp = cr; + tmp.MultiplyInplace(1.402F); + r.AddInplace(ref tmp); + // g = y - (0.344136F * cb) - (0.714136F * cr); - // b = y + (1.772F * cb); - // Adding & multiplying 8 elements at one time: - Vector r = y + (cr * new Vector(1.402F)); - Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); - Vector b = y + (cb * new Vector(1.772F)); + Vector4Pair g = y; + tmp = cb; + tmp.MultiplyInplace(-0.344136F); + g.AddInplace(ref tmp); + tmp = cr; + tmp.MultiplyInplace(-0.714136F); + g.AddInplace(ref tmp); - // Vector has no .Clamp(), need to switch to Vector4 for the next operation: - // TODO: Is it worth to use Vector at all? - Vector4Pair rr = Unsafe.As, Vector4Pair>(ref r); - Vector4Pair gg = Unsafe.As, Vector4Pair>(ref g); - Vector4Pair bb = Unsafe.As, Vector4Pair>(ref b); + // b = y + (1.772F * cb); + Vector4Pair b = y; + tmp = cb; + tmp.MultiplyInplace(1.772F); + b.AddInplace(ref tmp); - rr.RoundAndDownscale(); - gg.RoundAndDownscale(); - bb.RoundAndDownscale(); + r.RoundAndDownscale(); + g.RoundAndDownscale(); + b.RoundAndDownscale(); // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Collect(ref rr, ref gg, ref bb); + destination.Collect(ref r, ref g, ref b); } } + /// + /// Its faster to process multiple Vector4-s + /// private struct Vector4Pair { public Vector4 A; public Vector4 B; - private static readonly Vector4 Scale = new Vector4(1 / 255F); + private static readonly Vector4 Scale = new Vector4(1 / 255f); + + private static readonly Vector4 Half = new Vector4(0.5f); [MethodImpl(MethodImplOptions.AggressiveInlining)] public void RoundAndDownscale() { - this.A = this.A.PseudoRound() * Scale; - this.B = this.B.PseudoRound() * Scale; + // Emulate rounding: + this.A += Half; + this.B += Half; + + // Downscale by 1/255 + this.A *= Scale; + this.B *= Scale; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void MultiplyInplace(float value) + { + this.A *= value; + this.B *= value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void AddInplace(Vector4 value) + { + this.A += value; + this.B += value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void AddInplace(ref Vector4Pair other) + { + this.A += other.A; + this.B += other.B; } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index a414ee977..e9db65105 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -89,16 +89,16 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [InlineData(64, 1)] [InlineData(16, 2)] [InlineData(8, 3)] - public void FromYCbCrSimd256_ConvertCore(int size, int seed) + public void FromYCbCrSimd_ConvertCore(int size, int seed) { - ValidateConversion(JpegColorConverter.FromYCbCrSimd256.ConvertCore, 3, size, size, seed, ValidateYCbCr); + ValidateConversion(JpegColorConverter.FromYCbCrSimd.ConvertCore, 3, size, size, seed, ValidateYCbCr); } [Theory] [MemberData(nameof(CommonConversionData))] - public void FromYCbCrSimd256(int inputBufferLength, int resultBufferLength, int seed) + public void FromYCbCrSimd(int inputBufferLength, int resultBufferLength, int seed) { - ValidateConversion(new JpegColorConverter.FromYCbCrSimd256(), 3, inputBufferLength, resultBufferLength, seed, ValidateYCbCr); + ValidateConversion(new JpegColorConverter.FromYCbCrSimd(), 3, inputBufferLength, resultBufferLength, seed, ValidateYCbCr); } [Theory] @@ -108,9 +108,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg ValidateConversion(JpegColorSpace.YCbCr, 3, inputBufferLength, resultBufferLength, seed, ValidateYCbCr); } - [Theory] - [InlineData(false)] - [InlineData(true)] + // Becnhmark, for local execution only + //[Theory] + //[InlineData(false)] + //[InlineData(true)] public void BenchmarkYCbCr(bool simd) { int count = 2053; @@ -119,7 +120,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1); Vector4[] result = new Vector4[count]; - JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrSimd256() : new JpegColorConverter.FromYCbCrBasic(); + JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrSimd() : new JpegColorConverter.FromYCbCrBasic(); // Warm up: converter.ConvertToRGBA(values, result);