diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs index f3a0636200..83d4024447 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs @@ -25,7 +25,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters protected override void ConvertCoreVectorized(in ComponentValues values, Span result) { - #if SUPPORTS_RUNTIME_INTRINSICS +#if SUPPORTS_RUNTIME_INTRINSICS ref Vector256 yBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); ref Vector256 cbBase = @@ -94,8 +94,68 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters #endif } + protected override void ConvertCoreVectorizedInplace(in ComponentValues values) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 c0Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 c1Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 c2Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-this.HalfValue); + var scale = Vector256.Create(1 / this.MaximumValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + // Walking 8 elements at one step: + int n = values.Component0.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + ref Vector256 c0 = ref Unsafe.Add(ref c0Base, i); + ref Vector256 c1 = ref Unsafe.Add(ref c1Base, i); + ref Vector256 c2 = ref Unsafe.Add(ref c2Base, i); + + Vector256 y = c0; + Vector256 cb = Avx.Add(c1, chromaOffset); + Vector256 cr = Avx.Add(c2, chromaOffset); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); + g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); + b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); + + c0 = r; + c1 = g; + c2 = b; + } +#endif + } + protected override void ConvertCore(in ComponentValues values, Span result) => FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + + protected override void ConvertCoreInplace(in ComponentValues values) => + FromYCbCrBasic.ConvertCoreInplace(values, this.MaximumValue, this.HalfValue); } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs index 352e4acb7e..cc37e4e7dc 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs @@ -20,6 +20,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ConvertCore(values, result, this.MaximumValue, this.HalfValue); } + public override void ConvertToRgbInplace(in ComponentValues values) + => ConvertCoreInplace(values, this.MaximumValue, this.HalfValue); + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) { // TODO: We can optimize a lot here with Vector and SRCS.Unsafe()! @@ -46,6 +49,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters result[i] = v; } } + + internal static void ConvertCoreInplace(in ComponentValues values, float maxValue, float halfValue) + { + Span c0 = values.Component0; + Span c1 = values.Component1; + Span c2 = values.Component2; + + var scale = 1 / maxValue; + + for (int i = 0; i < c0.Length; i++) + { + float y = c0[i]; + float cb = c1[i] - halfValue; + float cr = c2[i] - halfValue; + + c0[i] = MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero) * scale; + c1[i] = MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero) * scale; + c2[i] = MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero) * scale; + } + } } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs index 42f8eef5a1..c770111cf7 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs @@ -85,8 +85,72 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } + protected override void ConvertCoreVectorizedInplace(in ComponentValues values) + { + DebugGuard.IsTrue(values.Component0.Length % 8 == 0, nameof(values), "Length should be divisible by 8!"); + + ref Vector4Pair c0Base = + ref Unsafe.As(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector4Pair c1Base = + ref Unsafe.As(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector4Pair c2Base = + ref Unsafe.As(ref MemoryMarshal.GetReference(values.Component2)); + + var chromaOffset = new Vector4(-this.HalfValue); + var maxValue = this.MaximumValue; + + // Walking 8 elements at one step: + int n = values.Component0.Length / 8; + + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + ref Vector4Pair c0 = ref Unsafe.Add(ref c0Base, i); + + // cb = cbVals[i] - halfValue); + ref Vector4Pair c1 = ref Unsafe.Add(ref c1Base, i); + c1.AddInplace(chromaOffset); + + // cr = crVals[i] - halfValue; + ref Vector4Pair c2 = ref Unsafe.Add(ref c2Base, i); + c2.AddInplace(chromaOffset); + + // r = y + (1.402F * cr); + Vector4Pair r = c0; + Vector4Pair tmp = c2; + tmp.MultiplyInplace(1.402F); + r.AddInplace(ref tmp); + + // g = y - (0.344136F * cb) - (0.714136F * cr); + Vector4Pair g = c0; + tmp = c1; + tmp.MultiplyInplace(-0.344136F); + g.AddInplace(ref tmp); + tmp = c2; + tmp.MultiplyInplace(-0.714136F); + g.AddInplace(ref tmp); + + // b = y + (1.772F * cb); + Vector4Pair b = c0; + tmp = c1; + tmp.MultiplyInplace(1.772F); + b.AddInplace(ref tmp); + + r.RoundAndDownscalePreVector8(maxValue); + g.RoundAndDownscalePreVector8(maxValue); + b.RoundAndDownscalePreVector8(maxValue); + + c0 = r; + c1 = g; + c2 = b; + } + } + protected override void ConvertCore(in ComponentValues values, Span result) => FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + + protected override void ConvertCoreInplace(in ComponentValues values) + => FromYCbCrBasic.ConvertCoreInplace(values, this.MaximumValue, this.HalfValue); } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs index abacf7161b..f6015447b8 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs @@ -80,8 +80,59 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } + protected override void ConvertCoreVectorizedInplace(in ComponentValues values) + { + ref Vector c0Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector c1Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector c2Base = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + var chromaOffset = new Vector(-this.HalfValue); + + // Walking 8 elements at one step: + int n = values.Component0.Length / 8; + var scale = new Vector(1 / this.MaximumValue); + + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + ref Vector c0 = ref Unsafe.Add(ref c0Base, i); + ref Vector c1 = ref Unsafe.Add(ref c1Base, i); + ref Vector c2 = ref Unsafe.Add(ref c2Base, i); + Vector y = Unsafe.Add(ref c0Base, i); + Vector cb = Unsafe.Add(ref c1Base, i) + chromaOffset; + Vector cr = Unsafe.Add(ref c2Base, i) + chromaOffset; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector r = y + (cr * new Vector(1.402F)); + Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); + Vector b = y + (cb * new Vector(1.772F)); + + r = r.FastRound(); + g = g.FastRound(); + b = b.FastRound(); + r *= scale; + g *= scale; + b *= scale; + + c0 = r; + c1 = g; + c2 = b; + } + } + protected override void ConvertCore(in ComponentValues values, Span result) => FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + + protected override void ConvertCoreInplace(in ComponentValues values) => + FromYCbCrBasic.ConvertCoreInplace(values, this.MaximumValue, this.HalfValue); } } }