From 7229dbf73f6c1898641128b9b9af5728a37ad174 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Tue, 18 May 2021 12:03:42 +0300 Subject: [PATCH 01/99] Block8x8F explicit layout & 256bit rows support --- .../Formats/Jpeg/Components/Block8x8F.cs | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 2d19f5ce2..dbc22eaea 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Represents a Jpeg block with coefficients. /// - [StructLayout(LayoutKind.Sequential)] + [StructLayout(LayoutKind.Explicit)] internal partial struct Block8x8F : IEquatable { /// @@ -27,29 +27,64 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components public const int Size = 64; #pragma warning disable SA1600 // ElementsMustBeDocumented + [FieldOffset(0)] public Vector4 V0L; + [FieldOffset(16)] public Vector4 V0R; + [FieldOffset(32)] public Vector4 V1L; + [FieldOffset(48)] public Vector4 V1R; + [FieldOffset(64)] public Vector4 V2L; + [FieldOffset(80)] public Vector4 V2R; + [FieldOffset(96)] public Vector4 V3L; + [FieldOffset(112)] public Vector4 V3R; + [FieldOffset(128)] public Vector4 V4L; + [FieldOffset(144)] public Vector4 V4R; + [FieldOffset(160)] public Vector4 V5L; + [FieldOffset(176)] public Vector4 V5R; + [FieldOffset(192)] public Vector4 V6L; + [FieldOffset(208)] public Vector4 V6R; + [FieldOffset(224)] public Vector4 V7L; + [FieldOffset(240)] public Vector4 V7R; + +#if SUPPORTS_RUNTIME_INTRINSICS + [FieldOffset(0)] + public Vector256 V0; + [FieldOffset(32)] + public Vector256 V1; + [FieldOffset(64)] + public Vector256 V2; + [FieldOffset(96)] + public Vector256 V3; + [FieldOffset(128)] + public Vector256 V4; + [FieldOffset(160)] + public Vector256 V5; + [FieldOffset(192)] + public Vector256 V6; + [FieldOffset(224)] + public Vector256 V7; +#endif #pragma warning restore SA1600 // ElementsMustBeDocumented /// From fbf0ff1466ef410de2fb77d22c6cdef074cad6ce Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Tue, 18 May 2021 12:08:26 +0300 Subject: [PATCH 02/99] Block8x8F.MultiplyInPlace no longer use unsafe casts Improved performance, no need for Unsafe calls. --- .../Formats/Jpeg/Components/Block8x8F.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index dbc22eaea..52a1a7aa9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -313,14 +313,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components if (Avx.IsSupported) { var valueVec = Vector256.Create(value); - Unsafe.As>(ref this.V0L) = Avx.Multiply(Unsafe.As>(ref this.V0L), valueVec); - Unsafe.As>(ref this.V1L) = Avx.Multiply(Unsafe.As>(ref this.V1L), valueVec); - Unsafe.As>(ref this.V2L) = Avx.Multiply(Unsafe.As>(ref this.V2L), valueVec); - Unsafe.As>(ref this.V3L) = Avx.Multiply(Unsafe.As>(ref this.V3L), valueVec); - Unsafe.As>(ref this.V4L) = Avx.Multiply(Unsafe.As>(ref this.V4L), valueVec); - Unsafe.As>(ref this.V5L) = Avx.Multiply(Unsafe.As>(ref this.V5L), valueVec); - Unsafe.As>(ref this.V6L) = Avx.Multiply(Unsafe.As>(ref this.V6L), valueVec); - Unsafe.As>(ref this.V7L) = Avx.Multiply(Unsafe.As>(ref this.V7L), valueVec); + this.V0 = Avx.Multiply(this.V0, valueVec); + this.V1 = Avx.Multiply(this.V1, valueVec); + this.V2 = Avx.Multiply(this.V2, valueVec); + this.V3 = Avx.Multiply(this.V3, valueVec); + this.V4 = Avx.Multiply(this.V4, valueVec); + this.V5 = Avx.Multiply(this.V5, valueVec); + this.V6 = Avx.Multiply(this.V6, valueVec); + this.V7 = Avx.Multiply(this.V7, valueVec); } else #endif From 20236b8c756ecbd6fd75c789b58dca5ed028d1e9 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Tue, 18 May 2021 12:18:37 +0300 Subject: [PATCH 03/99] Block8x8F.TransposeInto no longer uses unsafe casts (partially) --- .../Formats/Jpeg/Components/Block8x8F.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 52a1a7aa9..9072ca196 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -840,26 +840,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Vector256 t0 = Avx.UnpackLow(r0, r1); Vector256 t2 = Avx.UnpackLow(r2, r3); Vector256 v = Avx.Shuffle(t0, t2, 0x4E); - Unsafe.As>(ref d.V0L) = Avx.Blend(t0, v, 0xCC); - Unsafe.As>(ref d.V1L) = Avx.Blend(t2, v, 0x33); + d.V0 = Avx.Blend(t0, v, 0xCC); + d.V1 = Avx.Blend(t2, v, 0x33); Vector256 t4 = Avx.UnpackLow(r4, r5); Vector256 t6 = Avx.UnpackLow(r6, r7); v = Avx.Shuffle(t4, t6, 0x4E); - Unsafe.As>(ref d.V4L) = Avx.Blend(t4, v, 0xCC); - Unsafe.As>(ref d.V5L) = Avx.Blend(t6, v, 0x33); + d.V4 = Avx.Blend(t4, v, 0xCC); + d.V5 = Avx.Blend(t6, v, 0x33); Vector256 t1 = Avx.UnpackHigh(r0, r1); Vector256 t3 = Avx.UnpackHigh(r2, r3); v = Avx.Shuffle(t1, t3, 0x4E); - Unsafe.As>(ref d.V2L) = Avx.Blend(t1, v, 0xCC); - Unsafe.As>(ref d.V3L) = Avx.Blend(t3, v, 0x33); + d.V2 = Avx.Blend(t1, v, 0xCC); + d.V3 = Avx.Blend(t3, v, 0x33); Vector256 t5 = Avx.UnpackHigh(r4, r5); Vector256 t7 = Avx.UnpackHigh(r6, r7); v = Avx.Shuffle(t5, t7, 0x4E); - Unsafe.As>(ref d.V6L) = Avx.Blend(t5, v, 0xCC); - Unsafe.As>(ref d.V7L) = Avx.Blend(t7, v, 0x33); + d.V6 = Avx.Blend(t5, v, 0xCC); + d.V7 = Avx.Blend(t7, v, 0x33); } else #endif From e5188fe4f4b2060ed3329d696d4efb16bb7a51ca Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Tue, 18 May 2021 12:56:53 +0300 Subject: [PATCH 04/99] Implemented FDCT8x8 using avx instruction set, added backward compatibility for FDCT8x4 calls using FDCT8x8(ref Block8x8F, ref Block8x8F) method --- .../Jpeg/Components/FastFloatingPointDCT.cs | 120 +++++++++++++++++- 1 file changed, 114 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index a6d0622dd..ad47aa05f 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -3,6 +3,10 @@ using System.Numerics; using System.Runtime.CompilerServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Formats.Jpeg.Components @@ -38,6 +42,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components private const float C_0_765367 = 0.765366865f; private const float C_0_125 = 0.1250f; + +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector256 C_V_0_5411 = Vector256.Create(0.541196f); + private static readonly Vector256 C_V_1_3065 = Vector256.Create(1.306563f); + private static readonly Vector256 C_V_1_1758 = Vector256.Create(1.175876f); + private static readonly Vector256 C_V_0_7856 = Vector256.Create(0.785695f); + private static readonly Vector256 C_V_1_3870 = Vector256.Create(1.387040f); + private static readonly Vector256 C_V_0_2758 = Vector256.Create(0.275899f); + + private static Vector256 C_V_InvSqrt2 = Vector256.Create(0.707107f); +#endif #pragma warning restore SA1310 // FieldNamesMustNotContainUnderscore private static readonly Vector4 InvSqrt2 = new Vector4(0.707107f); @@ -308,12 +323,107 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components d.V7R = c0 - c3; } +#if SUPPORTS_RUNTIME_INTRINSICS + /// + /// + /// + /// Source + /// Destination + private static void FDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d) + { + Vector256 t0 = Avx.Add(s.V0, s.V7); + Vector256 t7 = Avx.Subtract(s.V0, s.V7); + Vector256 t1 = Avx.Add(s.V1, s.V6); + Vector256 t6 = Avx.Subtract(s.V1, s.V6); + Vector256 t2 = Avx.Add(s.V2, s.V5); + Vector256 t5 = Avx.Subtract(s.V2, s.V5); + Vector256 t3 = Avx.Add(s.V3, s.V4); + Vector256 t4 = Avx.Subtract(s.V3, s.V4); + + Vector256 c0 = Avx.Add(t0, t3); + Vector256 c1 = Avx.Add(t1, t2); + + // 0 4 + d.V0 = Avx.Add(c0, c1); + d.V4 = Avx.Subtract(c0, c1); + + Vector256 c3 = Avx.Subtract(t0, t3); + Vector256 c2 = Avx.Subtract(t1, t2); + + // 2 6 + if (Fma.IsSupported) + { + d.V2 = Fma.MultiplyAdd(c2, C_V_0_5411, Avx.Multiply(c3, C_V_1_3065)); + d.V6 = Fma.MultiplySubtract(c3, C_V_0_5411, Avx.Multiply(c2, C_V_1_3065)); + } + else + { + d.V2 = Avx.Add(Avx.Multiply(c2, C_V_0_5411), Avx.Multiply(c3, C_V_1_3065)); + d.V6 = Avx.Subtract(Avx.Multiply(c3, C_V_0_5411), Avx.Multiply(c2, C_V_1_3065)); + } + + if (Fma.IsSupported) + { + c3 = Fma.MultiplyAdd(t4, C_V_1_1758, Avx.Multiply(t7, C_V_0_7856)); + c0 = Fma.MultiplySubtract(t7, C_V_1_1758, Avx.Multiply(t4, C_V_0_7856)); + } + else + { + c3 = Avx.Add(Avx.Multiply(t4, C_V_1_1758), Avx.Multiply(t7, C_V_0_7856)); + c0 = Avx.Subtract(Avx.Multiply(t7, C_V_1_1758), Avx.Multiply(t4, C_V_0_7856)); + } + + if (Fma.IsSupported) + { + c2 = Fma.MultiplyAdd(t5, C_V_1_3870, Avx.Multiply(C_V_0_2758, t6)); + c1 = Fma.MultiplySubtract(t6, C_V_1_3870, Avx.Multiply(C_V_0_2758, t5)); + } + else + { + c2 = Avx.Add(Avx.Multiply(t5, C_V_1_3870), Avx.Multiply(C_V_0_2758, t6)); + c1 = Avx.Subtract(Avx.Multiply(t6, C_V_1_3870), Avx.Multiply(C_V_0_2758, t5)); + } + + // 3 5 + d.V3 = Avx.Subtract(c0, c2); + d.V5 = Avx.Subtract(c3, c1); + + c0 = Avx.Multiply(Avx.Add(c0, c2), C_V_InvSqrt2); + c3 = Avx.Multiply(Avx.Add(c3, c1), C_V_InvSqrt2); + + // 1 7 + d.V1 = Avx.Add(c0, c3); + d.V7 = Avx.Subtract(c0, c3); + } +#endif + /// - /// Apply floating point IDCT transformation into dest, using a temporary block 'temp' provided by the caller (optimization) + /// Performs 8x8 matrix Forward Discrete Cosine Transform /// + /// Source + /// Destination + public static void FDCT8x8(ref Block8x8F s, ref Block8x8F d) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + FDCT8x8_Avx(ref s, ref d); + } + else +#endif + { + FDCT8x4_LeftPart(ref s, ref d); + FDCT8x4_RightPart(ref s, ref d); + } + } + + /// + /// Apply floating point FDCT from src into dest + /// + /// /// Source /// Destination - /// Temporary block provided by the caller + /// Temporary block provided by the caller for optimization /// If true, a constant -128.0 offset is applied for all values before FDCT public static void TransformFDCT( ref Block8x8F src, @@ -327,13 +437,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components temp.AddInPlace(-128F); } - FDCT8x4_LeftPart(ref temp, ref dest); - FDCT8x4_RightPart(ref temp, ref dest); + FDCT8x8(ref temp, ref dest); dest.TransposeInto(ref temp); - FDCT8x4_LeftPart(ref temp, ref dest); - FDCT8x4_RightPart(ref temp, ref dest); + FDCT8x8(ref temp, ref dest); dest.MultiplyInPlace(C_0_125); } From 513e86a904d2352bfb23773aafd221cab71711f8 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Tue, 18 May 2021 15:37:14 +0300 Subject: [PATCH 05/99] Implemented IDCT algorithm with avx/fma, move IDCT code to a different file --- .../Components/FastFloatingPointDCT.IDCT.cs | 263 ++++++++++++++++++ .../Jpeg/Components/FastFloatingPointDCT.cs | 151 +--------- 2 files changed, 275 insertions(+), 139 deletions(-) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs new file mode 100644 index 000000000..1c990db6b --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs @@ -0,0 +1,263 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System.Numerics; +using System.Runtime.CompilerServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif + +// ReSharper disable InconsistentNaming +namespace SixLabors.ImageSharp.Formats.Jpeg.Components +{ + /// + /// Contains inaccurate, but fast forward and inverse DCT implementations. + /// + internal static partial class FastFloatingPointDCT + { + /// + /// Apply floating point IDCT transformation into dest, using a temporary block 'temp' provided by the caller (optimization). + /// Ported from https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L239 + /// + /// Source + /// Destination + /// Temporary block provided by the caller + public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp) + { + src.TransposeInto(ref temp); + + IDCT8x8(ref temp, ref dest); + dest.TransposeInto(ref temp); + IDCT8x8(ref temp, ref dest); + + // TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing? + dest.MultiplyInPlace(C_0_125); + } + + /// + /// Performs 8x8 matrix Inverse Discrete Cosine Transform + /// + /// Source + /// Destination + public static void IDCT8x8(ref Block8x8F s, ref Block8x8F d) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + IDCT8x8_Avx(ref s, ref d); + } + else +#endif + { + IDCT8x4_LeftPart(ref s, ref d); + IDCT8x4_RightPart(ref s, ref d); + } + } + + /// + /// Do IDCT internal operations on the left part of the block. Original src: + /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261 + /// + /// The source block + /// Destination block + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void IDCT8x4_LeftPart(ref Block8x8F s, ref Block8x8F d) + { + Vector4 my1 = s.V1L; + Vector4 my7 = s.V7L; + Vector4 mz0 = my1 + my7; + + Vector4 my3 = s.V3L; + Vector4 mz2 = my3 + my7; + Vector4 my5 = s.V5L; + Vector4 mz1 = my3 + my5; + Vector4 mz3 = my1 + my5; + + Vector4 mz4 = (mz0 + mz1) * C_1_175876; + + mz2 = (mz2 * C_1_961571) + mz4; + mz3 = (mz3 * C_0_390181) + mz4; + mz0 = mz0 * C_0_899976; + mz1 = mz1 * C_2_562915; + + Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2; + Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3; + Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2; + Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3; + + Vector4 my2 = s.V2L; + Vector4 my6 = s.V6L; + mz4 = (my2 + my6) * C_0_541196; + Vector4 my0 = s.V0L; + Vector4 my4 = s.V4L; + mz0 = my0 + my4; + mz1 = my0 - my4; + + mz2 = mz4 + (my6 * C_1_847759); + mz3 = mz4 + (my2 * C_0_765367); + + my0 = mz0 + mz3; + my3 = mz0 - mz3; + my1 = mz1 + mz2; + my2 = mz1 - mz2; + + d.V0L = my0 + mb0; + d.V7L = my0 - mb0; + d.V1L = my1 + mb1; + d.V6L = my1 - mb1; + d.V2L = my2 + mb2; + d.V5L = my2 - mb2; + d.V3L = my3 + mb3; + d.V4L = my3 - mb3; + } + + /// + /// Do IDCT internal operations on the right part of the block. + /// Original src: + /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261 + /// + /// The source block + /// The destination block + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void IDCT8x4_RightPart(ref Block8x8F s, ref Block8x8F d) + { + Vector4 my1 = s.V1R; + Vector4 my7 = s.V7R; + Vector4 mz0 = my1 + my7; + + Vector4 my3 = s.V3R; + Vector4 mz2 = my3 + my7; + Vector4 my5 = s.V5R; + Vector4 mz1 = my3 + my5; + Vector4 mz3 = my1 + my5; + + Vector4 mz4 = (mz0 + mz1) * C_1_175876; + + mz2 = (mz2 * C_1_961571) + mz4; + mz3 = (mz3 * C_0_390181) + mz4; + mz0 = mz0 * C_0_899976; + mz1 = mz1 * C_2_562915; + + Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2; + Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3; + Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2; + Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3; + + Vector4 my2 = s.V2R; + Vector4 my6 = s.V6R; + mz4 = (my2 + my6) * C_0_541196; + Vector4 my0 = s.V0R; + Vector4 my4 = s.V4R; + mz0 = my0 + my4; + mz1 = my0 - my4; + + mz2 = mz4 + (my6 * C_1_847759); + mz3 = mz4 + (my2 * C_0_765367); + + my0 = mz0 + mz3; + my3 = mz0 - mz3; + my1 = mz1 + mz2; + my2 = mz1 - mz2; + + d.V0R = my0 + mb0; + d.V7R = my0 - mb0; + d.V1R = my1 + mb1; + d.V6R = my1 - mb1; + d.V2R = my2 + mb2; + d.V5R = my2 - mb2; + d.V3R = my3 + mb3; + d.V4R = my3 - mb3; + } + +#if SUPPORTS_RUNTIME_INTRINSICS + /// + /// Do IDCT internal operations on the given block. + /// + /// Source + /// Destination + public static void IDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d) + { + Vector256 my1 = s.V1; + Vector256 my7 = s.V7; + Vector256 mz0 = Avx.Add(my1, my7); + + Vector256 my3 = s.V3; + Vector256 mz2 = Avx.Add(my3, my7); + Vector256 my5 = s.V5; + Vector256 mz1 = Avx.Add(my3, my5); + Vector256 mz3 = Avx.Add(my1, my5); + + Vector256 mz4 = Avx.Multiply(Avx.Add(mz0, mz1), w1_1758); + + if (Fma.IsSupported) + { + mz2 = Fma.MultiplyAdd(mz2, C_V_n1_9615, mz4); + mz3 = Fma.MultiplyAdd(mz3, C_V_n0_3901, mz4); + } + else + { + mz2 = Avx.Add(Avx.Multiply(mz2, C_V_n1_9615), mz4); + mz3 = Avx.Add(Avx.Multiply(mz3, C_V_n0_3901), mz4); + } + + mz0 = Avx.Multiply(mz0, C_V_n0_8999); + mz1 = Avx.Multiply(mz1, C_V_n2_5629); + + + Unsafe.SkipInit(out Vector256 mb3); + Unsafe.SkipInit(out Vector256 mb2); + Unsafe.SkipInit(out Vector256 mb1); + Unsafe.SkipInit(out Vector256 mb0); + + if (Fma.IsSupported) + { + mb3 = Avx.Add(Fma.MultiplyAdd(my7, C_V_0_2986, mz0), mz2); + mb2 = Avx.Add(Fma.MultiplyAdd(my5, C_V_2_0531, mz1), mz3); + mb1 = Avx.Add(Fma.MultiplyAdd(my3, C_V_3_0727, mz1), mz2); + mb0 = Avx.Add(Fma.MultiplyAdd(my1, C_V_1_5013, mz0), mz3); + } + else + { + mb3 = Avx.Add(Avx.Add(Avx.Multiply(my7, C_V_0_2986), mz0), mz2); + mb2 = Avx.Add(Avx.Add(Avx.Multiply(my5, C_V_2_0531), mz1), mz3); + mb1 = Avx.Add(Avx.Add(Avx.Multiply(my3, C_V_3_0727), mz1), mz2); + mb0 = Avx.Add(Avx.Add(Avx.Multiply(my1, C_V_1_5013), mz0), mz3); + } + + Vector256 my2 = s.V2; + Vector256 my6 = s.V6; + mz4 = Avx.Multiply(Avx.Add(my2, my6), w0_5411); + Vector256 my0 = s.V0; + Vector256 my4 = s.V4; + mz0 = Avx.Add(my0, my4); + mz1 = Avx.Subtract(my0, my4); + + if (Fma.IsSupported) + { + mz2 = Fma.MultiplyAdd(my6, C_V_n1_8477, mz4); + mz3 = Fma.MultiplyAdd(my2, C_V_0_7653, mz4); + } + else + { + mz2 = Avx.Add(Avx.Multiply(my6, C_V_n1_8477), mz4); + mz3 = Avx.Add(Avx.Multiply(my2, C_V_0_7653), mz4); + } + + my0 = Avx.Add(mz0, mz3); + my3 = Avx.Subtract(mz0, mz3); + my1 = Avx.Add(mz1, mz2); + my2 = Avx.Subtract(mz1, mz2); + + d.V0 = Avx.Add(my0, mb0); + d.V7 = Avx.Subtract(my0, mb0); + d.V1 = Avx.Add(my1, mb1); + d.V6 = Avx.Subtract(my1, mb1); + d.V2 = Avx.Add(my2, mb2); + d.V5 = Avx.Subtract(my2, mb2); + d.V3 = Avx.Add(my3, mb3); + d.V4 = Avx.Subtract(my3, mb3); + } +#endif + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index ad47aa05f..4ef4ab7b0 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -14,7 +14,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Contains inaccurate, but fast forward and inverse DCT implementations. /// - internal static class FastFloatingPointDCT + internal static partial class FastFloatingPointDCT { #pragma warning disable SA1310 // FieldNamesMustNotContainUnderscore private const float C_1_175876 = 1.175875602f; @@ -51,149 +51,22 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components private static readonly Vector256 C_V_1_3870 = Vector256.Create(1.387040f); private static readonly Vector256 C_V_0_2758 = Vector256.Create(0.275899f); + private static readonly Vector256 C_V_n1_9615 = Vector256.Create(-1.961570560f); + private static readonly Vector256 C_V_n0_3901 = Vector256.Create(-0.390180644f); + private static readonly Vector256 C_V_n0_8999 = Vector256.Create(-0.899976223f); + private static readonly Vector256 C_V_n2_5629 = Vector256.Create(-2.562915447f); + private static readonly Vector256 C_V_0_2986 = Vector256.Create(0.298631336f); + private static readonly Vector256 C_V_2_0531 = Vector256.Create(2.053119869f); + private static readonly Vector256 C_V_3_0727 = Vector256.Create(3.072711026f); + private static readonly Vector256 C_V_1_5013 = Vector256.Create(1.501321110f); + private static readonly Vector256 C_V_n1_8477 = Vector256.Create(-1.847759065f); + private static readonly Vector256 C_V_0_7653 = Vector256.Create(0.765366865f); + private static Vector256 C_V_InvSqrt2 = Vector256.Create(0.707107f); #endif #pragma warning restore SA1310 // FieldNamesMustNotContainUnderscore private static readonly Vector4 InvSqrt2 = new Vector4(0.707107f); - /// - /// Apply floating point IDCT transformation into dest, using a temporary block 'temp' provided by the caller (optimization). - /// Ported from https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L239 - /// - /// Source - /// Destination - /// Temporary block provided by the caller - public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp) - { - src.TransposeInto(ref temp); - - IDCT8x4_LeftPart(ref temp, ref dest); - IDCT8x4_RightPart(ref temp, ref dest); - - dest.TransposeInto(ref temp); - - IDCT8x4_LeftPart(ref temp, ref dest); - IDCT8x4_RightPart(ref temp, ref dest); - - // TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing? - dest.MultiplyInPlace(C_0_125); - } - - /// - /// Do IDCT internal operations on the left part of the block. Original src: - /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261 - /// - /// The source block - /// Destination block - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void IDCT8x4_LeftPart(ref Block8x8F s, ref Block8x8F d) - { - Vector4 my1 = s.V1L; - Vector4 my7 = s.V7L; - Vector4 mz0 = my1 + my7; - - Vector4 my3 = s.V3L; - Vector4 mz2 = my3 + my7; - Vector4 my5 = s.V5L; - Vector4 mz1 = my3 + my5; - Vector4 mz3 = my1 + my5; - - Vector4 mz4 = (mz0 + mz1) * C_1_175876; - - mz2 = (mz2 * C_1_961571) + mz4; - mz3 = (mz3 * C_0_390181) + mz4; - mz0 = mz0 * C_0_899976; - mz1 = mz1 * C_2_562915; - - Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2; - Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3; - Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2; - Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3; - - Vector4 my2 = s.V2L; - Vector4 my6 = s.V6L; - mz4 = (my2 + my6) * C_0_541196; - Vector4 my0 = s.V0L; - Vector4 my4 = s.V4L; - mz0 = my0 + my4; - mz1 = my0 - my4; - - mz2 = mz4 + (my6 * C_1_847759); - mz3 = mz4 + (my2 * C_0_765367); - - my0 = mz0 + mz3; - my3 = mz0 - mz3; - my1 = mz1 + mz2; - my2 = mz1 - mz2; - - d.V0L = my0 + mb0; - d.V7L = my0 - mb0; - d.V1L = my1 + mb1; - d.V6L = my1 - mb1; - d.V2L = my2 + mb2; - d.V5L = my2 - mb2; - d.V3L = my3 + mb3; - d.V4L = my3 - mb3; - } - - /// - /// Do IDCT internal operations on the right part of the block. - /// Original src: - /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261 - /// - /// The source block - /// The destination block - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void IDCT8x4_RightPart(ref Block8x8F s, ref Block8x8F d) - { - Vector4 my1 = s.V1R; - Vector4 my7 = s.V7R; - Vector4 mz0 = my1 + my7; - - Vector4 my3 = s.V3R; - Vector4 mz2 = my3 + my7; - Vector4 my5 = s.V5R; - Vector4 mz1 = my3 + my5; - Vector4 mz3 = my1 + my5; - - Vector4 mz4 = (mz0 + mz1) * C_1_175876; - - mz2 = (mz2 * C_1_961571) + mz4; - mz3 = (mz3 * C_0_390181) + mz4; - mz0 = mz0 * C_0_899976; - mz1 = mz1 * C_2_562915; - - Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2; - Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3; - Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2; - Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3; - - Vector4 my2 = s.V2R; - Vector4 my6 = s.V6R; - mz4 = (my2 + my6) * C_0_541196; - Vector4 my0 = s.V0R; - Vector4 my4 = s.V4R; - mz0 = my0 + my4; - mz1 = my0 - my4; - - mz2 = mz4 + (my6 * C_1_847759); - mz3 = mz4 + (my2 * C_0_765367); - - my0 = mz0 + mz3; - my3 = mz0 - mz3; - my1 = mz1 + mz2; - my2 = mz1 - mz2; - - d.V0R = my0 + mb0; - d.V7R = my0 - mb0; - d.V1R = my1 + mb1; - d.V6R = my1 - mb1; - d.V2R = my2 + mb2; - d.V5R = my2 - mb2; - d.V3R = my3 + mb3; - d.V4R = my3 - mb3; - } - /// /// Original: /// From 81c21e5af42088dccea6ce40115034cc84d928f2 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Tue, 18 May 2021 15:50:24 +0300 Subject: [PATCH 06/99] Fixed "constant" vectors naming --- .../Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs index 1c990db6b..fd3ad8d5f 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using System.Numerics; using System.Runtime.CompilerServices; #if SUPPORTS_RUNTIME_INTRINSICS @@ -188,7 +189,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Vector256 mz1 = Avx.Add(my3, my5); Vector256 mz3 = Avx.Add(my1, my5); - Vector256 mz4 = Avx.Multiply(Avx.Add(mz0, mz1), w1_1758); + Vector256 mz4 = Avx.Multiply(Avx.Add(mz0, mz1), C_V_1_1758); if (Fma.IsSupported) { @@ -227,7 +228,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Vector256 my2 = s.V2; Vector256 my6 = s.V6; - mz4 = Avx.Multiply(Avx.Add(my2, my6), w0_5411); + mz4 = Avx.Multiply(Avx.Add(my2, my6), C_V_0_5411); Vector256 my0 = s.V0; Vector256 my4 = s.V4; mz0 = Avx.Add(my0, my4); From 9bf9644e650b2a67b324e37506e56b435bc2676e Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 20 May 2021 09:41:58 +0300 Subject: [PATCH 07/99] RgbToYCbCrConverterLut.Convert main loop routine now uses named constant instead of a 'magic value' --- .../Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index 3c1a02c5a..1ceea1e08 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -119,7 +119,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { ref Rgb24 rgbStart = ref rgbSpan[0]; - for (int i = 0; i < 64; i++) + for (int i = 0; i < Block8x8F.Size; i++) { ref Rgb24 c = ref Unsafe.Add(ref rgbStart, i); From 347ac360ec56e0e63ec97ba32f05d5bf8ea35b32 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 20 May 2021 14:09:32 +0300 Subject: [PATCH 08/99] LuminanceForwardConverter.Convert main loop routine now uses named constant instead of a 'magic value' --- .../Components/Encoder/LuminanceForwardConverter{TPixel}.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs index cc81130dd..fc5b9a868 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs @@ -49,7 +49,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref Block8x8F yBlock = ref this.Y; ref L8 l8Start = ref l8Span[0]; - for (int i = 0; i < 64; i++) + for (int i = 0; i < Block8x8F.Size; i++) { ref L8 c = ref Unsafe.Add(ref l8Start, i); yBlock[i] = c.PackedValue; From 86a6d8be975df1ec74963b3201a4b10eaa8aef51 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 20 May 2021 16:06:13 +0300 Subject: [PATCH 09/99] WriteDefineHuffmanTables(...) no longer relies on external buffer for stream writes --- .../Formats/Jpeg/JpegEncoderCore.cs | 44 ++++++++++--------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index f5dc1c79f..79f0d3022 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -41,12 +41,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg /// private readonly byte[] emitBuffer = new byte[64]; - /// - /// A buffer for reducing the number of stream writes when emitting Huffman tables. Max combined table lengths + - /// identifier. - /// - private readonly byte[] huffmanBuffer = new byte[179]; - /// /// Gets or sets the subsampling method to use. /// @@ -635,30 +629,40 @@ namespace SixLabors.ImageSharp.Formats.Jpeg markerlen += 1 + 16 + s.Values.Length; } + // TODO: this magic constant (array size) should be defined by HuffmanSpec class + // This is a one-time call which can be stackalloc'ed or allocated directly in memory as method local array + // Allocation here would be better for GC so it won't live for entire encoding process + // TODO: if this is allocated on the heap - pin it right here or following copy code will corrupt memory + Span huffmanBuffer = stackalloc byte[179]; + byte* huffmanBufferPtr = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(huffmanBuffer)); + this.WriteMarkerHeader(JpegConstants.Markers.DHT, markerlen); for (int i = 0; i < specs.Length; i++) { ref HuffmanSpec spec = ref specs[i]; + int len = 0; - fixed (byte* huffman = this.huffmanBuffer) - fixed (byte* count = spec.Count) - fixed (byte* values = spec.Values) - { - huffman[len++] = headers[i]; + // header + huffmanBuffer[len++] = headers[i]; - for (int c = 0; c < spec.Count.Length; c++) - { - huffman[len++] = count[c]; - } + // count + fixed (byte* countPtr = spec.Count) + { + int countLen = spec.Count.Length; + Unsafe.CopyBlockUnaligned(huffmanBufferPtr + len, countPtr, (uint)countLen); + len += countLen; + } - for (int v = 0; v < spec.Values.Length; v++) - { - huffman[len++] = values[v]; - } + // values + fixed (byte* valuesPtr = spec.Values) + { + int valuesLen = spec.Values.Length; + Unsafe.CopyBlockUnaligned(huffmanBufferPtr + len, valuesPtr, (uint)valuesLen); + len += valuesLen; } - this.outputStream.Write(this.huffmanBuffer, 0, len); + this.outputStream.Write(huffmanBuffer, 0, len); } } From f0017556cf06ee0d881b723f1fd6277b858732e4 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 20 May 2021 16:46:55 +0300 Subject: [PATCH 10/99] [WIP] Partially moved encoding logic to a separate class --- .../Encoder/YCbCrEncoder{TPixel}.cs | 532 ++++++++++++++++++ .../Formats/Jpeg/JpegEncoderCore.cs | 28 +- 2 files changed, 539 insertions(+), 21 deletions(-) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs new file mode 100644 index 000000000..2ef053eb1 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs @@ -0,0 +1,532 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.CompilerServices; +using System.Text; +using System.Threading; +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder +{ + internal class YCbCrEncoder + { + /// + /// A buffer for reducing the number of stream writes when emitting Huffman tables. 64 seems to be enough. + /// + private byte[] emitBuffer = new byte[64]; + + /// + /// The accumulated bits to write to the stream. + /// + private uint accumulatedBits; + + /// + /// The accumulated bit count. + /// + private uint bitCount; + + /// + /// The scaled chrominance table, in zig-zag order. + /// + private Block8x8F chrominanceQuantTable; + + /// + /// The scaled luminance table, in zig-zag order. + /// + private Block8x8F luminanceQuantTable; + + /// + /// The output stream. All attempted writes after the first error become no-ops. + /// + private Stream outputStream; + + /// + /// Gets the counts the number of bits needed to hold an integer. + /// + // The C# compiler emits this as a compile-time constant embedded in the PE file. + // This is effectively compiled down to: return new ReadOnlySpan(&data, length) + // More details can be found: https://github.com/dotnet/roslyn/pull/24621 + private static ReadOnlySpan BitCountLut => new byte[] + { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, + }; + + /// + /// Gets the unscaled quantization tables in zig-zag order. Each + /// encoder copies and scales the tables according to its quality parameter. + /// The values are derived from section K.1 after converting from natural to + /// zig-zag order. + /// + // The C# compiler emits this as a compile-time constant embedded in the PE file. + // This is effectively compiled down to: return new ReadOnlySpan(&data, length) + // More details can be found: https://github.com/dotnet/roslyn/pull/24621 + private static ReadOnlySpan UnscaledQuant_Luminance => new byte[] + { + // Luminance. + 16, 11, 12, 14, 12, 10, 16, 14, 13, 14, 18, 17, 16, 19, 24, + 40, 26, 24, 22, 22, 24, 49, 35, 37, 29, 40, 58, 51, 61, 60, + 57, 51, 56, 55, 64, 72, 92, 78, 64, 68, 87, 69, 55, 56, 80, + 109, 81, 87, 95, 98, 103, 104, 103, 62, 77, 113, 121, 112, + 100, 120, 92, 101, 103, 99, + }; + + /// + /// Gets the unscaled quantization tables in zig-zag order. Each + /// encoder copies and scales the tables according to its quality parameter. + /// The values are derived from section K.1 after converting from natural to + /// zig-zag order. + /// + // The C# compiler emits this as a compile-time constant embedded in the PE file. + // This is effectively compiled down to: return new ReadOnlySpan(&data, length) + // More details can be found: https://github.com/dotnet/roslyn/pull/24621 + private static ReadOnlySpan UnscaledQuant_Chrominance => new byte[] + { + // Chrominance. + 17, 18, 18, 24, 21, 24, 47, 26, 26, 47, 99, 66, 56, 66, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + }; + + + public ref Block8x8F ChrominanceQuantizationTable => ref this.chrominanceQuantTable; + + public ref Block8x8F LuminanceQuantizationTable => ref this.luminanceQuantTable; + + + public YCbCrEncoder(Stream outputStream, int componentCount, int quality) + { + this.outputStream = outputStream; + + // Convert from a quality rating to a scaling factor. + int scale; + if (quality < 50) + { + scale = 5000 / quality; + } + else + { + scale = 200 - (quality * 2); + } + + // Initialize the quantization tables. + InitQuantizationTable(0, scale, ref this.luminanceQuantTable); + if (componentCount > 1) + { + InitQuantizationTable(1, scale, ref this.chrominanceQuantTable); + } + } + + /// + /// Encodes the image with no subsampling. + /// + /// The pixel format. + /// The pixel accessor providing access to the image pixels. + /// The token to monitor for cancellation. + /// The reference to the emit buffer. + public void Encode444(Image pixels, CancellationToken cancellationToken, ref byte emitBufferBase) + where TPixel : unmanaged, IPixel + { + // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) + // (Partially done with YCbCrForwardConverter) + Block8x8F temp1 = default; + Block8x8F temp2 = default; + + Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable; + Block8x8F onStackChrominanceQuantTable = this.chrominanceQuantTable; + + var unzig = ZigZag.CreateUnzigTable(); + + // ReSharper disable once InconsistentNaming + int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; + + var pixelConverter = YCbCrForwardConverter.Create(); + ImageFrame frame = pixels.Frames.RootFrame; + Buffer2D pixelBuffer = frame.PixelBuffer; + RowOctet currentRows = default; + + for (int y = 0; y < pixels.Height; y += 8) + { + cancellationToken.ThrowIfCancellationRequested(); + currentRows.Update(pixelBuffer, y); + + for (int x = 0; x < pixels.Width; x += 8) + { + pixelConverter.Convert(frame, x, y, ref currentRows); + + prevDCY = this.WriteBlock( + QuantIndex.Luminance, + prevDCY, + ref pixelConverter.Y, + ref temp1, + ref temp2, + ref onStackLuminanceQuantTable, + ref unzig, + ref emitBufferBase); + + prevDCCb = this.WriteBlock( + QuantIndex.Chrominance, + prevDCCb, + ref pixelConverter.Cb, + ref temp1, + ref temp2, + ref onStackChrominanceQuantTable, + ref unzig, + ref emitBufferBase); + + prevDCCr = this.WriteBlock( + QuantIndex.Chrominance, + prevDCCr, + ref pixelConverter.Cr, + ref temp1, + ref temp2, + ref onStackChrominanceQuantTable, + ref unzig, + ref emitBufferBase); + } + } + } + + /// + /// Encodes the image with subsampling. The Cb and Cr components are each subsampled + /// at a factor of 2 both horizontally and vertically. + /// + /// The pixel format. + /// The pixel accessor providing access to the image pixels. + /// The token to monitor for cancellation. + /// The reference to the emit buffer. + public void Encode420(Image pixels, CancellationToken cancellationToken, ref byte emitBufferBase) + where TPixel : unmanaged, IPixel + { + // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) + Block8x8F b = default; + Span cb = stackalloc Block8x8F[4]; + Span cr = stackalloc Block8x8F[4]; + + Block8x8F temp1 = default; + Block8x8F temp2 = default; + + Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable; + Block8x8F onStackChrominanceQuantTable = this.chrominanceQuantTable; + + var unzig = ZigZag.CreateUnzigTable(); + + var pixelConverter = YCbCrForwardConverter.Create(); + + // ReSharper disable once InconsistentNaming + int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; + ImageFrame frame = pixels.Frames.RootFrame; + Buffer2D pixelBuffer = frame.PixelBuffer; + RowOctet currentRows = default; + + for (int y = 0; y < pixels.Height; y += 16) + { + cancellationToken.ThrowIfCancellationRequested(); + for (int x = 0; x < pixels.Width; x += 16) + { + for (int i = 0; i < 4; i++) + { + int xOff = (i & 1) * 8; + int yOff = (i & 2) * 4; + + currentRows.Update(pixelBuffer, y + yOff); + pixelConverter.Convert(frame, x + xOff, y + yOff, ref currentRows); + + cb[i] = pixelConverter.Cb; + cr[i] = pixelConverter.Cr; + + prevDCY = this.WriteBlock( + QuantIndex.Luminance, + prevDCY, + ref pixelConverter.Y, + ref temp1, + ref temp2, + ref onStackLuminanceQuantTable, + ref unzig, + ref emitBufferBase); + } + + Block8x8F.Scale16X16To8X8(ref b, cb); + prevDCCb = this.WriteBlock( + QuantIndex.Chrominance, + prevDCCb, + ref b, + ref temp1, + ref temp2, + ref onStackChrominanceQuantTable, + ref unzig, + ref emitBufferBase); + + Block8x8F.Scale16X16To8X8(ref b, cr); + prevDCCr = this.WriteBlock( + QuantIndex.Chrominance, + prevDCCr, + ref b, + ref temp1, + ref temp2, + ref onStackChrominanceQuantTable, + ref unzig, + ref emitBufferBase); + } + } + } + + + /// + /// Encodes the image with no chroma, just luminance. + /// + /// The pixel format. + /// The pixel accessor providing access to the image pixels. + /// The token to monitor for cancellation. + /// The reference to the emit buffer. + public void EncodeGrayscale(Image pixels, CancellationToken cancellationToken, ref byte emitBufferBase) + where TPixel : unmanaged, IPixel + { + // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) + // (Partially done with YCbCrForwardConverter) + Block8x8F temp1 = default; + Block8x8F temp2 = default; + + Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable; + + var unzig = ZigZag.CreateUnzigTable(); + + // ReSharper disable once InconsistentNaming + int prevDCY = 0; + + var pixelConverter = LuminanceForwardConverter.Create(); + ImageFrame frame = pixels.Frames.RootFrame; + Buffer2D pixelBuffer = frame.PixelBuffer; + RowOctet currentRows = default; + + for (int y = 0; y < pixels.Height; y += 8) + { + cancellationToken.ThrowIfCancellationRequested(); + currentRows.Update(pixelBuffer, y); + + for (int x = 0; x < pixels.Width; x += 8) + { + pixelConverter.Convert(frame, x, y, ref currentRows); + + prevDCY = this.WriteBlock( + QuantIndex.Luminance, + prevDCY, + ref pixelConverter.Y, + ref temp1, + ref temp2, + ref onStackLuminanceQuantTable, + ref unzig, + ref emitBufferBase); + } + } + } + + + /// + /// Writes a block of pixel data using the given quantization table, + /// returning the post-quantized DC value of the DCT-transformed block. + /// The block is in natural (not zig-zag) order. + /// + /// The quantization table index. + /// The previous DC value. + /// Source block + /// Temporal block to be used as FDCT Destination + /// Temporal block 2 + /// Quantization table + /// The 8x8 Unzig block. + /// The reference to the emit buffer. + /// The . + private int WriteBlock( + QuantIndex index, + int prevDC, + ref Block8x8F src, + ref Block8x8F tempDest1, + ref Block8x8F tempDest2, + ref Block8x8F quant, + ref ZigZag unZig, + ref byte emitBufferBase) + { + FastFloatingPointDCT.TransformFDCT(ref src, ref tempDest1, ref tempDest2); + + Block8x8F.Quantize(ref tempDest1, ref tempDest2, ref quant, ref unZig); + + int dc = (int)tempDest2[0]; + + // Emit the DC delta. + this.EmitHuffRLE((HuffIndex)((2 * (int)index) + 0), 0, dc - prevDC, ref emitBufferBase); + + // Emit the AC components. + var h = (HuffIndex)((2 * (int)index) + 1); + int runLength = 0; + + for (int zig = 1; zig < Block8x8F.Size; zig++) + { + int ac = (int)tempDest2[zig]; + + if (ac == 0) + { + runLength++; + } + else + { + while (runLength > 15) + { + this.EmitHuff(h, 0xf0, ref emitBufferBase); + runLength -= 16; + } + + this.EmitHuffRLE(h, runLength, ac, ref emitBufferBase); + runLength = 0; + } + } + + if (runLength > 0) + { + this.EmitHuff(h, 0x00, ref emitBufferBase); + } + + return dc; + } + + /// + /// Emits the least significant count of bits of bits to the bit-stream. + /// The precondition is bits + /// + /// < 1<<nBits && nBits <= 16 + /// + /// . + /// + /// The packed bits. + /// The number of bits + /// The reference to the emitBuffer. + [MethodImpl(InliningOptions.ShortMethod)] + private void Emit(uint bits, uint count, ref byte emitBufferBase) + { + count += this.bitCount; + bits <<= (int)(32 - count); + bits |= this.accumulatedBits; + + // Only write if more than 8 bits. + if (count >= 8) + { + // Track length + int len = 0; + while (count >= 8) + { + byte b = (byte)(bits >> 24); + Unsafe.Add(ref emitBufferBase, len++) = b; + if (b == byte.MaxValue) + { + Unsafe.Add(ref emitBufferBase, len++) = byte.MinValue; + } + + bits <<= 8; + count -= 8; + } + + if (len > 0) + { + this.outputStream.Write(this.emitBuffer, 0, len); + } + } + + this.accumulatedBits = bits; + this.bitCount = count; + } + + /// + /// Emits the given value with the given Huffman encoder. + /// + /// The index of the Huffman encoder + /// The value to encode. + /// The reference to the emit buffer. + [MethodImpl(InliningOptions.ShortMethod)] + private void EmitHuff(HuffIndex index, int value, ref byte emitBufferBase) + { + uint x = HuffmanLut.TheHuffmanLut[(int)index].Values[value]; + this.Emit(x & ((1 << 24) - 1), x >> 24, ref emitBufferBase); + } + + /// + /// Emits a run of runLength copies of value encoded with the given Huffman encoder. + /// + /// The index of the Huffman encoder + /// The number of copies to encode. + /// The value to encode. + /// The reference to the emit buffer. + [MethodImpl(InliningOptions.ShortMethod)] + private void EmitHuffRLE(HuffIndex index, int runLength, int value, ref byte emitBufferBase) + { + int a = value; + int b = value; + if (a < 0) + { + a = -value; + b = value - 1; + } + + uint bt; + if (a < 0x100) + { + bt = BitCountLut[a]; + } + else + { + bt = 8 + (uint)BitCountLut[a >> 8]; + } + + this.EmitHuff(index, (int)((uint)(runLength << 4) | bt), ref emitBufferBase); + if (bt > 0) + { + this.Emit((uint)b & (uint)((1 << ((int)bt)) - 1), bt, ref emitBufferBase); + } + } + + + /// + /// Initializes quantization table. + /// + /// The quantization index. + /// The scaling factor. + /// The quantization table. + private static void InitQuantizationTable(int i, int scale, ref Block8x8F quant) + { + DebugGuard.MustBeBetweenOrEqualTo(i, 0, 1, nameof(i)); + ReadOnlySpan unscaledQuant = (i == 0) ? UnscaledQuant_Luminance : UnscaledQuant_Chrominance; + + for (int j = 0; j < Block8x8F.Size; j++) + { + int x = unscaledQuant[j]; + x = ((x * scale) + 50) / 100; + if (x < 1) + { + x = 1; + } + + if (x > 255) + { + x = 255; + } + + quant[j] = x; + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index 79f0d3022..14cb87af3 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -183,23 +183,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg int qlty = Numerics.Clamp(this.quality ?? metadata.GetJpegMetadata().Quality, 1, 100); this.subsample ??= qlty >= 91 ? JpegSubsample.Ratio444 : JpegSubsample.Ratio420; - // Convert from a quality rating to a scaling factor. - int scale; - if (qlty < 50) - { - scale = 5000 / qlty; - } - else - { - scale = 200 - (qlty * 2); - } - - // Initialize the quantization tables. - InitQuantizationTable(0, scale, ref this.luminanceQuantTable); - if (componentCount > 1) - { - InitQuantizationTable(1, scale, ref this.chrominanceQuantTable); - } + YCbCrEncoder scanEncoder = new YCbCrEncoder(stream, componentCount, qlty); + this.luminanceQuantTable = scanEncoder.LuminanceQuantizationTable; + this.chrominanceQuantTable = scanEncoder.ChrominanceQuantizationTable; // Write the Start Of Image marker. this.WriteApplicationHeader(metadata); @@ -208,7 +194,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.WriteProfiles(metadata); // Write the quantization tables. - this.WriteDefineQuantizationTables(); + this.WriteDefineQuantizationTables(ref scanEncoder.LuminanceQuantizationTable, ref scanEncoder.ChrominanceQuantizationTable); // Write the image dimensions. this.WriteStartOfFrame(image.Width, image.Height, componentCount); @@ -669,7 +655,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg /// /// Writes the Define Quantization Marker and tables. /// - private void WriteDefineQuantizationTables() + private void WriteDefineQuantizationTables(ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable) { // Marker + quantization table lengths int markerlen = 2 + (QuantizationTableCount * (1 + Block8x8F.Size)); @@ -681,8 +667,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg byte[] dqt = new byte[dqtCount]; int offset = 0; - WriteDataToDqt(dqt, ref offset, QuantIndex.Luminance, ref this.luminanceQuantTable); - WriteDataToDqt(dqt, ref offset, QuantIndex.Chrominance, ref this.chrominanceQuantTable); + WriteDataToDqt(dqt, ref offset, QuantIndex.Luminance, ref luminanceQuantTable); + WriteDataToDqt(dqt, ref offset, QuantIndex.Chrominance, ref chrominanceQuantTable); this.outputStream.Write(dqt, 0, dqtCount); } From d91fc408bce53d853e01d55c14c1785b6769b350 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 07:47:51 +0300 Subject: [PATCH 11/99] Removed write buffer parameter injection --- .../Encoder/YCbCrEncoder{TPixel}.cs | 54 ++++++++----------- 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs index 2ef053eb1..6c8183244 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs @@ -141,7 +141,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The pixel accessor providing access to the image pixels. /// The token to monitor for cancellation. /// The reference to the emit buffer. - public void Encode444(Image pixels, CancellationToken cancellationToken, ref byte emitBufferBase) + public void Encode444(Image pixels, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) @@ -178,8 +178,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref temp1, ref temp2, ref onStackLuminanceQuantTable, - ref unzig, - ref emitBufferBase); + ref unzig); prevDCCb = this.WriteBlock( QuantIndex.Chrominance, @@ -188,8 +187,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref temp1, ref temp2, ref onStackChrominanceQuantTable, - ref unzig, - ref emitBufferBase); + ref unzig); prevDCCr = this.WriteBlock( QuantIndex.Chrominance, @@ -198,8 +196,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref temp1, ref temp2, ref onStackChrominanceQuantTable, - ref unzig, - ref emitBufferBase); + ref unzig); } } } @@ -212,7 +209,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The pixel accessor providing access to the image pixels. /// The token to monitor for cancellation. /// The reference to the emit buffer. - public void Encode420(Image pixels, CancellationToken cancellationToken, ref byte emitBufferBase) + public void Encode420(Image pixels, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) @@ -259,8 +256,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref temp1, ref temp2, ref onStackLuminanceQuantTable, - ref unzig, - ref emitBufferBase); + ref unzig); } Block8x8F.Scale16X16To8X8(ref b, cb); @@ -271,8 +267,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref temp1, ref temp2, ref onStackChrominanceQuantTable, - ref unzig, - ref emitBufferBase); + ref unzig); Block8x8F.Scale16X16To8X8(ref b, cr); prevDCCr = this.WriteBlock( @@ -282,8 +277,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref temp1, ref temp2, ref onStackChrominanceQuantTable, - ref unzig, - ref emitBufferBase); + ref unzig); } } } @@ -296,7 +290,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The pixel accessor providing access to the image pixels. /// The token to monitor for cancellation. /// The reference to the emit buffer. - public void EncodeGrayscale(Image pixels, CancellationToken cancellationToken, ref byte emitBufferBase) + public void EncodeGrayscale(Image pixels, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) @@ -332,8 +326,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref temp1, ref temp2, ref onStackLuminanceQuantTable, - ref unzig, - ref emitBufferBase); + ref unzig); } } } @@ -360,8 +353,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref Block8x8F tempDest1, ref Block8x8F tempDest2, ref Block8x8F quant, - ref ZigZag unZig, - ref byte emitBufferBase) + ref ZigZag unZig) { FastFloatingPointDCT.TransformFDCT(ref src, ref tempDest1, ref tempDest2); @@ -370,7 +362,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int dc = (int)tempDest2[0]; // Emit the DC delta. - this.EmitHuffRLE((HuffIndex)((2 * (int)index) + 0), 0, dc - prevDC, ref emitBufferBase); + this.EmitHuffRLE((HuffIndex)((2 * (int)index) + 0), 0, dc - prevDC); // Emit the AC components. var h = (HuffIndex)((2 * (int)index) + 1); @@ -388,18 +380,18 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { while (runLength > 15) { - this.EmitHuff(h, 0xf0, ref emitBufferBase); + this.EmitHuff(h, 0xf0); runLength -= 16; } - this.EmitHuffRLE(h, runLength, ac, ref emitBufferBase); + this.EmitHuffRLE(h, runLength, ac); runLength = 0; } } if (runLength > 0) { - this.EmitHuff(h, 0x00, ref emitBufferBase); + this.EmitHuff(h, 0x00); } return dc; @@ -417,7 +409,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The number of bits /// The reference to the emitBuffer. [MethodImpl(InliningOptions.ShortMethod)] - private void Emit(uint bits, uint count, ref byte emitBufferBase) + private void Emit(uint bits, uint count) { count += this.bitCount; bits <<= (int)(32 - count); @@ -431,10 +423,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder while (count >= 8) { byte b = (byte)(bits >> 24); - Unsafe.Add(ref emitBufferBase, len++) = b; + this.emitBuffer[len++] = b; if (b == byte.MaxValue) { - Unsafe.Add(ref emitBufferBase, len++) = byte.MinValue; + this.emitBuffer[len++] = byte.MinValue; } bits <<= 8; @@ -458,10 +450,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The value to encode. /// The reference to the emit buffer. [MethodImpl(InliningOptions.ShortMethod)] - private void EmitHuff(HuffIndex index, int value, ref byte emitBufferBase) + private void EmitHuff(HuffIndex index, int value) { uint x = HuffmanLut.TheHuffmanLut[(int)index].Values[value]; - this.Emit(x & ((1 << 24) - 1), x >> 24, ref emitBufferBase); + this.Emit(x & ((1 << 24) - 1), x >> 24); } /// @@ -472,7 +464,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The value to encode. /// The reference to the emit buffer. [MethodImpl(InliningOptions.ShortMethod)] - private void EmitHuffRLE(HuffIndex index, int runLength, int value, ref byte emitBufferBase) + private void EmitHuffRLE(HuffIndex index, int runLength, int value) { int a = value; int b = value; @@ -492,10 +484,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder bt = 8 + (uint)BitCountLut[a >> 8]; } - this.EmitHuff(index, (int)((uint)(runLength << 4) | bt), ref emitBufferBase); + this.EmitHuff(index, (int)((uint)(runLength << 4) | bt)); if (bt > 0) { - this.Emit((uint)b & (uint)((1 << ((int)bt)) - 1), bt, ref emitBufferBase); + this.Emit((uint)b & (uint)((1 << ((int)bt)) - 1), bt); } } From 66b5a8df67437cb66dad2756e2a598df2aad1385 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 08:07:47 +0300 Subject: [PATCH 12/99] [WIP] Moved SOS writing logic to separate class --- .../Encoder/YCbCrEncoder{TPixel}.cs | 29 ++++++++++-- .../Formats/Jpeg/JpegEncoderCore.cs | 44 ++++++++++--------- 2 files changed, 49 insertions(+), 24 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs index 6c8183244..a8411e218 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs @@ -141,7 +141,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The pixel accessor providing access to the image pixels. /// The token to monitor for cancellation. /// The reference to the emit buffer. - public void Encode444(Image pixels, CancellationToken cancellationToken) + private void Encode444(Image pixels, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) @@ -209,7 +209,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The pixel accessor providing access to the image pixels. /// The token to monitor for cancellation. /// The reference to the emit buffer. - public void Encode420(Image pixels, CancellationToken cancellationToken) + private void Encode420(Image pixels, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) @@ -290,7 +290,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The pixel accessor providing access to the image pixels. /// The token to monitor for cancellation. /// The reference to the emit buffer. - public void EncodeGrayscale(Image pixels, CancellationToken cancellationToken) + private void EncodeGrayscale(Image pixels, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) @@ -331,6 +331,29 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } + public void WriteStartOfScan(Image image, JpegColorType? colorType, JpegSubsample? subsample, CancellationToken cancellationToken) + where TPixel : unmanaged, IPixel + { + if (colorType == JpegColorType.Luminance) + { + this.EncodeGrayscale(image, cancellationToken); + } + else + { + switch (subsample) + { + case JpegSubsample.Ratio444: + this.Encode444(image, cancellationToken); + break; + case JpegSubsample.Ratio420: + this.Encode420(image, cancellationToken); + break; + } + } + + // Pad the last byte with 1's. + this.Emit(0x7f, 7); + } /// /// Writes a block of pixel data using the given quantization table, diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index 14cb87af3..f1dd7f6bf 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -203,7 +203,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.WriteDefineHuffmanTables(componentCount); // Write the image data. - this.WriteStartOfScan(image, componentCount, cancellationToken); + this.WriteStartOfScan(scanEncoder, image, componentCount, cancellationToken); // Write the End Of Image marker. this.buffer[0] = JpegConstants.Markers.XFF; @@ -969,7 +969,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg /// The pixel accessor providing access to the image pixels. /// The number of components in a pixel. /// The token to monitor for cancellation. - private void WriteStartOfScan(Image image, int componentCount, CancellationToken cancellationToken) + private void WriteStartOfScan(YCbCrEncoder scanEncoder, Image image, int componentCount, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) @@ -1015,26 +1015,28 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.buffer[sosSize + 1] = 0x00; // Ah + Ah (Successive approximation bit position high + low) this.outputStream.Write(this.buffer, 0, sosSize + 2); - ref byte emitBufferBase = ref MemoryMarshal.GetReference(this.emitBuffer); - if (this.colorType == JpegColorType.Luminance) - { - this.EncodeGrayscale(image, cancellationToken, ref emitBufferBase); - } - else - { - switch (this.subsample) - { - case JpegSubsample.Ratio444: - this.Encode444(image, cancellationToken, ref emitBufferBase); - break; - case JpegSubsample.Ratio420: - this.Encode420(image, cancellationToken, ref emitBufferBase); - break; - } - } - // Pad the last byte with 1's. - this.Emit(0x7f, 7, ref emitBufferBase); + scanEncoder.WriteStartOfScan(image, this.colorType, this.subsample, cancellationToken); + //ref byte emitBufferBase = ref MemoryMarshal.GetReference(this.emitBuffer); + //if (this.colorType == JpegColorType.Luminance) + //{ + // scanEncoder.EncodeGrayscale(image, cancellationToken); + //} + //else + //{ + // switch (this.subsample) + // { + // case JpegSubsample.Ratio444: + // scanEncoder.Encode444(image, cancellationToken); + // break; + // case JpegSubsample.Ratio420: + // scanEncoder.Encode420(image, cancellationToken); + // break; + // } + //} + + //// Pad the last byte with 1's. + //this.Emit(0x7f, 7, ref emitBufferBase); } /// From 0d7e4b13f2df0a33bb9e1b36aa7878cf1c82f4a9 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 08:27:41 +0300 Subject: [PATCH 13/99] Removed unrelevant code from JpegDecoderCore --- .../Formats/Jpeg/JpegEncoderCore.cs | 473 ------------------ 1 file changed, 473 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index f1dd7f6bf..019be629b 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -92,67 +92,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.colorType = options.ColorType; } - /// - /// Gets the counts the number of bits needed to hold an integer. - /// - // The C# compiler emits this as a compile-time constant embedded in the PE file. - // This is effectively compiled down to: return new ReadOnlySpan(&data, length) - // More details can be found: https://github.com/dotnet/roslyn/pull/24621 - private static ReadOnlySpan BitCountLut => new byte[] - { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, - }; - - /// - /// Gets the unscaled quantization tables in zig-zag order. Each - /// encoder copies and scales the tables according to its quality parameter. - /// The values are derived from section K.1 after converting from natural to - /// zig-zag order. - /// - // The C# compiler emits this as a compile-time constant embedded in the PE file. - // This is effectively compiled down to: return new ReadOnlySpan(&data, length) - // More details can be found: https://github.com/dotnet/roslyn/pull/24621 - private static ReadOnlySpan UnscaledQuant_Luminance => new byte[] - { - // Luminance. - 16, 11, 12, 14, 12, 10, 16, 14, 13, 14, 18, 17, 16, 19, 24, - 40, 26, 24, 22, 22, 24, 49, 35, 37, 29, 40, 58, 51, 61, 60, - 57, 51, 56, 55, 64, 72, 92, 78, 64, 68, 87, 69, 55, 56, 80, - 109, 81, 87, 95, 98, 103, 104, 103, 62, 77, 113, 121, 112, - 100, 120, 92, 101, 103, 99, - }; - - /// - /// Gets the unscaled quantization tables in zig-zag order. Each - /// encoder copies and scales the tables according to its quality parameter. - /// The values are derived from section K.1 after converting from natural to - /// zig-zag order. - /// - // The C# compiler emits this as a compile-time constant embedded in the PE file. - // This is effectively compiled down to: return new ReadOnlySpan(&data, length) - // More details can be found: https://github.com/dotnet/roslyn/pull/24621 - private static ReadOnlySpan UnscaledQuant_Chrominance => new byte[] - { - // Chrominance. - 17, 18, 18, 24, 21, 24, 47, 26, 26, 47, 99, 66, 56, 66, - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - }; - /// /// Encode writes the image to the jpeg baseline format with the given options. /// @@ -228,248 +167,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg } } - /// - /// Initializes quantization table. - /// - /// The quantization index. - /// The scaling factor. - /// The quantization table. - private static void InitQuantizationTable(int i, int scale, ref Block8x8F quant) - { - DebugGuard.MustBeBetweenOrEqualTo(i, 0, 1, nameof(i)); - ReadOnlySpan unscaledQuant = (i == 0) ? UnscaledQuant_Luminance : UnscaledQuant_Chrominance; - - for (int j = 0; j < Block8x8F.Size; j++) - { - int x = unscaledQuant[j]; - x = ((x * scale) + 50) / 100; - if (x < 1) - { - x = 1; - } - - if (x > 255) - { - x = 255; - } - - quant[j] = x; - } - } - - /// - /// Emits the least significant count of bits of bits to the bit-stream. - /// The precondition is bits - /// - /// < 1<<nBits && nBits <= 16 - /// - /// . - /// - /// The packed bits. - /// The number of bits - /// The reference to the emitBuffer. - [MethodImpl(InliningOptions.ShortMethod)] - private void Emit(uint bits, uint count, ref byte emitBufferBase) - { - count += this.bitCount; - bits <<= (int)(32 - count); - bits |= this.accumulatedBits; - - // Only write if more than 8 bits. - if (count >= 8) - { - // Track length - int len = 0; - while (count >= 8) - { - byte b = (byte)(bits >> 24); - Unsafe.Add(ref emitBufferBase, len++) = b; - if (b == byte.MaxValue) - { - Unsafe.Add(ref emitBufferBase, len++) = byte.MinValue; - } - - bits <<= 8; - count -= 8; - } - - if (len > 0) - { - this.outputStream.Write(this.emitBuffer, 0, len); - } - } - - this.accumulatedBits = bits; - this.bitCount = count; - } - - /// - /// Emits the given value with the given Huffman encoder. - /// - /// The index of the Huffman encoder - /// The value to encode. - /// The reference to the emit buffer. - [MethodImpl(InliningOptions.ShortMethod)] - private void EmitHuff(HuffIndex index, int value, ref byte emitBufferBase) - { - uint x = HuffmanLut.TheHuffmanLut[(int)index].Values[value]; - this.Emit(x & ((1 << 24) - 1), x >> 24, ref emitBufferBase); - } - - /// - /// Emits a run of runLength copies of value encoded with the given Huffman encoder. - /// - /// The index of the Huffman encoder - /// The number of copies to encode. - /// The value to encode. - /// The reference to the emit buffer. - [MethodImpl(InliningOptions.ShortMethod)] - private void EmitHuffRLE(HuffIndex index, int runLength, int value, ref byte emitBufferBase) - { - int a = value; - int b = value; - if (a < 0) - { - a = -value; - b = value - 1; - } - - uint bt; - if (a < 0x100) - { - bt = BitCountLut[a]; - } - else - { - bt = 8 + (uint)BitCountLut[a >> 8]; - } - - this.EmitHuff(index, (int)((uint)(runLength << 4) | bt), ref emitBufferBase); - if (bt > 0) - { - this.Emit((uint)b & (uint)((1 << ((int)bt)) - 1), bt, ref emitBufferBase); - } - } - - /// - /// Encodes the image with no subsampling. - /// - /// The pixel format. - /// The pixel accessor providing access to the image pixels. - /// The token to monitor for cancellation. - /// The reference to the emit buffer. - private void Encode444(Image pixels, CancellationToken cancellationToken, ref byte emitBufferBase) - where TPixel : unmanaged, IPixel - { - // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) - // (Partially done with YCbCrForwardConverter) - Block8x8F temp1 = default; - Block8x8F temp2 = default; - - Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable; - Block8x8F onStackChrominanceQuantTable = this.chrominanceQuantTable; - - var unzig = ZigZag.CreateUnzigTable(); - - // ReSharper disable once InconsistentNaming - int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; - - var pixelConverter = YCbCrForwardConverter.Create(); - ImageFrame frame = pixels.Frames.RootFrame; - Buffer2D pixelBuffer = frame.PixelBuffer; - RowOctet currentRows = default; - - for (int y = 0; y < pixels.Height; y += 8) - { - cancellationToken.ThrowIfCancellationRequested(); - currentRows.Update(pixelBuffer, y); - - for (int x = 0; x < pixels.Width; x += 8) - { - pixelConverter.Convert(frame, x, y, ref currentRows); - - prevDCY = this.WriteBlock( - QuantIndex.Luminance, - prevDCY, - ref pixelConverter.Y, - ref temp1, - ref temp2, - ref onStackLuminanceQuantTable, - ref unzig, - ref emitBufferBase); - - prevDCCb = this.WriteBlock( - QuantIndex.Chrominance, - prevDCCb, - ref pixelConverter.Cb, - ref temp1, - ref temp2, - ref onStackChrominanceQuantTable, - ref unzig, - ref emitBufferBase); - - prevDCCr = this.WriteBlock( - QuantIndex.Chrominance, - prevDCCr, - ref pixelConverter.Cr, - ref temp1, - ref temp2, - ref onStackChrominanceQuantTable, - ref unzig, - ref emitBufferBase); - } - } - } - - /// - /// Encodes the image with no chroma, just luminance. - /// - /// The pixel format. - /// The pixel accessor providing access to the image pixels. - /// The token to monitor for cancellation. - /// The reference to the emit buffer. - private void EncodeGrayscale(Image pixels, CancellationToken cancellationToken, ref byte emitBufferBase) - where TPixel : unmanaged, IPixel - { - // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) - // (Partially done with YCbCrForwardConverter) - Block8x8F temp1 = default; - Block8x8F temp2 = default; - - Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable; - - var unzig = ZigZag.CreateUnzigTable(); - - // ReSharper disable once InconsistentNaming - int prevDCY = 0; - - var pixelConverter = LuminanceForwardConverter.Create(); - ImageFrame frame = pixels.Frames.RootFrame; - Buffer2D pixelBuffer = frame.PixelBuffer; - RowOctet currentRows = default; - - for (int y = 0; y < pixels.Height; y += 8) - { - cancellationToken.ThrowIfCancellationRequested(); - currentRows.Update(pixelBuffer, y); - - for (int x = 0; x < pixels.Width; x += 8) - { - pixelConverter.Convert(frame, x, y, ref currentRows); - - prevDCY = this.WriteBlock( - QuantIndex.Luminance, - prevDCY, - ref pixelConverter.Y, - ref temp1, - ref temp2, - ref onStackLuminanceQuantTable, - ref unzig, - ref emitBufferBase); - } - } - } - /// /// Writes the application header containing the JFIF identifier plus extra data. /// @@ -519,72 +216,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.outputStream.Write(this.buffer, 0, 20); } - /// - /// Writes a block of pixel data using the given quantization table, - /// returning the post-quantized DC value of the DCT-transformed block. - /// The block is in natural (not zig-zag) order. - /// - /// The quantization table index. - /// The previous DC value. - /// Source block - /// Temporal block to be used as FDCT Destination - /// Temporal block 2 - /// Quantization table - /// The 8x8 Unzig block. - /// The reference to the emit buffer. - /// The . - private int WriteBlock( - QuantIndex index, - int prevDC, - ref Block8x8F src, - ref Block8x8F tempDest1, - ref Block8x8F tempDest2, - ref Block8x8F quant, - ref ZigZag unZig, - ref byte emitBufferBase) - { - FastFloatingPointDCT.TransformFDCT(ref src, ref tempDest1, ref tempDest2); - - Block8x8F.Quantize(ref tempDest1, ref tempDest2, ref quant, ref unZig); - - int dc = (int)tempDest2[0]; - - // Emit the DC delta. - this.EmitHuffRLE((HuffIndex)((2 * (int)index) + 0), 0, dc - prevDC, ref emitBufferBase); - - // Emit the AC components. - var h = (HuffIndex)((2 * (int)index) + 1); - int runLength = 0; - - for (int zig = 1; zig < Block8x8F.Size; zig++) - { - int ac = (int)tempDest2[zig]; - - if (ac == 0) - { - runLength++; - } - else - { - while (runLength > 15) - { - this.EmitHuff(h, 0xf0, ref emitBufferBase); - runLength -= 16; - } - - this.EmitHuffRLE(h, runLength, ac, ref emitBufferBase); - runLength = 0; - } - } - - if (runLength > 0) - { - this.EmitHuff(h, 0x00, ref emitBufferBase); - } - - return dc; - } - /// /// Writes the Define Huffman Table marker and tables. /// @@ -1017,110 +648,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg scanEncoder.WriteStartOfScan(image, this.colorType, this.subsample, cancellationToken); - //ref byte emitBufferBase = ref MemoryMarshal.GetReference(this.emitBuffer); - //if (this.colorType == JpegColorType.Luminance) - //{ - // scanEncoder.EncodeGrayscale(image, cancellationToken); - //} - //else - //{ - // switch (this.subsample) - // { - // case JpegSubsample.Ratio444: - // scanEncoder.Encode444(image, cancellationToken); - // break; - // case JpegSubsample.Ratio420: - // scanEncoder.Encode420(image, cancellationToken); - // break; - // } - //} - - //// Pad the last byte with 1's. - //this.Emit(0x7f, 7, ref emitBufferBase); - } - - /// - /// Encodes the image with subsampling. The Cb and Cr components are each subsampled - /// at a factor of 2 both horizontally and vertically. - /// - /// The pixel format. - /// The pixel accessor providing access to the image pixels. - /// The token to monitor for cancellation. - /// The reference to the emit buffer. - private void Encode420(Image pixels, CancellationToken cancellationToken, ref byte emitBufferBase) - where TPixel : unmanaged, IPixel - { - // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) - Block8x8F b = default; - Span cb = stackalloc Block8x8F[4]; - Span cr = stackalloc Block8x8F[4]; - - Block8x8F temp1 = default; - Block8x8F temp2 = default; - - Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable; - Block8x8F onStackChrominanceQuantTable = this.chrominanceQuantTable; - - var unzig = ZigZag.CreateUnzigTable(); - - var pixelConverter = YCbCrForwardConverter.Create(); - - // ReSharper disable once InconsistentNaming - int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; - ImageFrame frame = pixels.Frames.RootFrame; - Buffer2D pixelBuffer = frame.PixelBuffer; - RowOctet currentRows = default; - - for (int y = 0; y < pixels.Height; y += 16) - { - cancellationToken.ThrowIfCancellationRequested(); - for (int x = 0; x < pixels.Width; x += 16) - { - for (int i = 0; i < 4; i++) - { - int xOff = (i & 1) * 8; - int yOff = (i & 2) * 4; - - currentRows.Update(pixelBuffer, y + yOff); - pixelConverter.Convert(frame, x + xOff, y + yOff, ref currentRows); - - cb[i] = pixelConverter.Cb; - cr[i] = pixelConverter.Cr; - - prevDCY = this.WriteBlock( - QuantIndex.Luminance, - prevDCY, - ref pixelConverter.Y, - ref temp1, - ref temp2, - ref onStackLuminanceQuantTable, - ref unzig, - ref emitBufferBase); - } - - Block8x8F.Scale16X16To8X8(ref b, cb); - prevDCCb = this.WriteBlock( - QuantIndex.Chrominance, - prevDCCb, - ref b, - ref temp1, - ref temp2, - ref onStackChrominanceQuantTable, - ref unzig, - ref emitBufferBase); - - Block8x8F.Scale16X16To8X8(ref b, cr); - prevDCCr = this.WriteBlock( - QuantIndex.Chrominance, - prevDCCr, - ref b, - ref temp1, - ref temp2, - ref onStackChrominanceQuantTable, - ref unzig, - ref emitBufferBase); - } - } } /// From d593479a8d692e3bdb593c658acbce4ce33f9d29 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 08:34:26 +0300 Subject: [PATCH 14/99] Removed remaining unrelevant code from JpegEncoderCore --- .../Formats/Jpeg/JpegEncoderCore.cs | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index 019be629b..2625d490c 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -36,11 +36,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg /// private readonly byte[] buffer = new byte[20]; - /// - /// A buffer for reducing the number of stream writes when emitting Huffman tables. 64 seems to be enough. - /// - private readonly byte[] emitBuffer = new byte[64]; - /// /// Gets or sets the subsampling method to use. /// @@ -56,26 +51,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg /// private readonly JpegColorType? colorType; - /// - /// The accumulated bits to write to the stream. - /// - private uint accumulatedBits; - - /// - /// The accumulated bit count. - /// - private uint bitCount; - - /// - /// The scaled chrominance table, in zig-zag order. - /// - private Block8x8F chrominanceQuantTable; - - /// - /// The scaled luminance table, in zig-zag order. - /// - private Block8x8F luminanceQuantTable; - /// /// The output stream. All attempted writes after the first error become no-ops. /// @@ -123,8 +98,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.subsample ??= qlty >= 91 ? JpegSubsample.Ratio444 : JpegSubsample.Ratio420; YCbCrEncoder scanEncoder = new YCbCrEncoder(stream, componentCount, qlty); - this.luminanceQuantTable = scanEncoder.LuminanceQuantizationTable; - this.chrominanceQuantTable = scanEncoder.ChrominanceQuantizationTable; // Write the Start Of Image marker. this.WriteApplicationHeader(metadata); From 296ee10c91f008c2627fe96b0e800e9eda7fffe9 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 11:43:30 +0300 Subject: [PATCH 15/99] Optimized jpeg encoder stream Write calls but a lot -> huge performance gain --- .../Encoder/YCbCrEncoder{TPixel}.cs | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs index a8411e218..7412b4d91 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs @@ -14,10 +14,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { internal class YCbCrEncoder { + private const int EmitBufferSizeInBytes = 1024; + /// /// A buffer for reducing the number of stream writes when emitting Huffman tables. 64 seems to be enough. /// - private byte[] emitBuffer = new byte[64]; + private byte[] emitBuffer = new byte[EmitBufferSizeInBytes]; /// /// The accumulated bits to write to the stream. @@ -353,6 +355,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // Pad the last byte with 1's. this.Emit(0x7f, 7); + this.outputStream.Write(this.emitBuffer, 0, this.emitLen); } /// @@ -420,8 +423,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder return dc; } + private int emitLen = 0; + /// - /// Emits the least significant count of bits of bits to the bit-stream. + /// Emits the least significant count of bits to the stream write buffer. /// The precondition is bits /// /// < 1<<nBits && nBits <= 16 @@ -442,23 +447,28 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder if (count >= 8) { // Track length - int len = 0; while (count >= 8) { byte b = (byte)(bits >> 24); - this.emitBuffer[len++] = b; + this.emitBuffer[this.emitLen++] = b; if (b == byte.MaxValue) { - this.emitBuffer[len++] = byte.MinValue; + this.emitBuffer[this.emitLen++] = byte.MinValue; } bits <<= 8; count -= 8; } - if (len > 0) + // This can emit 4 times of: + // 1 byte guaranteed + // 1 extra byte.MinValue byte if previous one was byte.MaxValue + // Thus writing (1 + 1) * 4 = 8 bytes max + // So we must check if emit buffer has extra 8 bytes, if not - call stream.Write + if (this.emitLen > EmitBufferSizeInBytes - 8) { - this.outputStream.Write(this.emitBuffer, 0, len); + this.outputStream.Write(this.emitBuffer, 0, this.emitLen); + this.emitLen = 0; } } From 56822d1bcc1f19c58601bc3e1ae541d8203e658d Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 11:46:53 +0300 Subject: [PATCH 16/99] Removed obsolete parameter config from various methods --- .../Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs index 7412b4d91..d5bf797bb 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs @@ -142,7 +142,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The pixel format. /// The pixel accessor providing access to the image pixels. /// The token to monitor for cancellation. - /// The reference to the emit buffer. private void Encode444(Image pixels, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { @@ -210,7 +209,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The pixel format. /// The pixel accessor providing access to the image pixels. /// The token to monitor for cancellation. - /// The reference to the emit buffer. private void Encode420(Image pixels, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { @@ -291,7 +289,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The pixel format. /// The pixel accessor providing access to the image pixels. /// The token to monitor for cancellation. - /// The reference to the emit buffer. private void EncodeGrayscale(Image pixels, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { @@ -370,7 +367,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// Temporal block 2 /// Quantization table /// The 8x8 Unzig block. - /// The reference to the emit buffer. /// The . private int WriteBlock( QuantIndex index, @@ -435,7 +431,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// The packed bits. /// The number of bits - /// The reference to the emitBuffer. [MethodImpl(InliningOptions.ShortMethod)] private void Emit(uint bits, uint count) { @@ -481,7 +476,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// The index of the Huffman encoder /// The value to encode. - /// The reference to the emit buffer. [MethodImpl(InliningOptions.ShortMethod)] private void EmitHuff(HuffIndex index, int value) { @@ -495,7 +489,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The index of the Huffman encoder /// The number of copies to encode. /// The value to encode. - /// The reference to the emit buffer. [MethodImpl(InliningOptions.ShortMethod)] private void EmitHuffRLE(HuffIndex index, int runLength, int value) { From 690e80cf69800038debc08856e2bfe4a3254a60f Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 12:29:11 +0300 Subject: [PATCH 17/99] YCbCrEncoder now has builtin temporal 8x8F blocks for internal calculations --- .../Encoder/YCbCrEncoder{TPixel}.cs | 38 +++++-------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs index d5bf797bb..5b63d0588 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs @@ -41,6 +41,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// private Block8x8F luminanceQuantTable; + private Block8x8F temporalBlock1; + private Block8x8F temporalBlock2; + /// /// The output stream. All attempted writes after the first error become no-ops. /// @@ -145,11 +148,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder private void Encode444(Image pixels, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) - // (Partially done with YCbCrForwardConverter) - Block8x8F temp1 = default; - Block8x8F temp2 = default; - Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable; Block8x8F onStackChrominanceQuantTable = this.chrominanceQuantTable; @@ -176,8 +174,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder QuantIndex.Luminance, prevDCY, ref pixelConverter.Y, - ref temp1, - ref temp2, ref onStackLuminanceQuantTable, ref unzig); @@ -185,8 +181,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder QuantIndex.Chrominance, prevDCCb, ref pixelConverter.Cb, - ref temp1, - ref temp2, ref onStackChrominanceQuantTable, ref unzig); @@ -194,8 +188,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder QuantIndex.Chrominance, prevDCCr, ref pixelConverter.Cr, - ref temp1, - ref temp2, ref onStackChrominanceQuantTable, ref unzig); } @@ -217,9 +209,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder Span cb = stackalloc Block8x8F[4]; Span cr = stackalloc Block8x8F[4]; - Block8x8F temp1 = default; - Block8x8F temp2 = default; - Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable; Block8x8F onStackChrominanceQuantTable = this.chrominanceQuantTable; @@ -253,8 +242,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder QuantIndex.Luminance, prevDCY, ref pixelConverter.Y, - ref temp1, - ref temp2, ref onStackLuminanceQuantTable, ref unzig); } @@ -264,8 +251,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder QuantIndex.Chrominance, prevDCCb, ref b, - ref temp1, - ref temp2, ref onStackChrominanceQuantTable, ref unzig); @@ -274,8 +259,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder QuantIndex.Chrominance, prevDCCr, ref b, - ref temp1, - ref temp2, ref onStackChrominanceQuantTable, ref unzig); } @@ -322,8 +305,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder QuantIndex.Luminance, prevDCY, ref pixelConverter.Y, - ref temp1, - ref temp2, ref onStackLuminanceQuantTable, ref unzig); } @@ -372,16 +353,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder QuantIndex index, int prevDC, ref Block8x8F src, - ref Block8x8F tempDest1, - ref Block8x8F tempDest2, ref Block8x8F quant, ref ZigZag unZig) { - FastFloatingPointDCT.TransformFDCT(ref src, ref tempDest1, ref tempDest2); + ref Block8x8F refTemp1 = ref this.temporalBlock1; + ref Block8x8F refTemp2 = ref this.temporalBlock2; + + FastFloatingPointDCT.TransformFDCT(ref src, ref refTemp1, ref refTemp2); - Block8x8F.Quantize(ref tempDest1, ref tempDest2, ref quant, ref unZig); + Block8x8F.Quantize(ref refTemp1, ref refTemp2, ref quant, ref unZig); - int dc = (int)tempDest2[0]; + int dc = (int)refTemp2[0]; // Emit the DC delta. this.EmitHuffRLE((HuffIndex)((2 * (int)index) + 0), 0, dc - prevDC); @@ -392,7 +374,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int zig = 1; zig < Block8x8F.Size; zig++) { - int ac = (int)tempDest2[zig]; + int ac = (int)refTemp2[zig]; if (ac == 0) { From b3a993806c64331c633ce154b53590a4f48e8bf6 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 13:06:51 +0300 Subject: [PATCH 18/99] Updated & fixed xml documentation --- .../Encoder/YCbCrEncoder{TPixel}.cs | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs index 5b63d0588..a10f40b09 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs @@ -13,21 +13,34 @@ using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { internal class YCbCrEncoder + where TPixel : unmanaged, IPixel { + /// + /// Number of bytes cached before being written to target stream via Stream.Write(byte[], offest, count). + /// + /// + /// This is subject to change, 1024 seems to be the best value in terms of performance. + /// expects it to be at least 8 (see comments in method body). + /// private const int EmitBufferSizeInBytes = 1024; /// - /// A buffer for reducing the number of stream writes when emitting Huffman tables. 64 seems to be enough. + /// A buffer for reducing the number of stream writes when emitting Huffman tables. /// private byte[] emitBuffer = new byte[EmitBufferSizeInBytes]; /// - /// The accumulated bits to write to the stream. + /// Number of filled bytes in buffer + /// + private int emitLen = 0; + + /// + /// Emmited bits 'micro buffer' before being transfered to the . /// private uint accumulatedBits; /// - /// The accumulated bit count. + /// Number of jagged bits stored in /// private uint bitCount; @@ -44,10 +57,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder private Block8x8F temporalBlock1; private Block8x8F temporalBlock2; + private ImageFrame source; + /// /// The output stream. All attempted writes after the first error become no-ops. /// - private Stream outputStream; + private Stream target; /// /// Gets the counts the number of bits needed to hold an integer. @@ -118,7 +133,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public YCbCrEncoder(Stream outputStream, int componentCount, int quality) { - this.outputStream = outputStream; + this.target = outputStream; // Convert from a quality rating to a scaling factor. int scale; @@ -333,7 +348,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // Pad the last byte with 1's. this.Emit(0x7f, 7); - this.outputStream.Write(this.emitBuffer, 0, this.emitLen); + this.target.Write(this.emitBuffer, 0, this.emitLen); } /// @@ -344,8 +359,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The quantization table index. /// The previous DC value. /// Source block - /// Temporal block to be used as FDCT Destination - /// Temporal block 2 /// Quantization table /// The 8x8 Unzig block. /// The . @@ -401,8 +414,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder return dc; } - private int emitLen = 0; - /// /// Emits the least significant count of bits to the stream write buffer. /// The precondition is bits @@ -444,7 +455,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // So we must check if emit buffer has extra 8 bytes, if not - call stream.Write if (this.emitLen > EmitBufferSizeInBytes - 8) { - this.outputStream.Write(this.emitBuffer, 0, this.emitLen); + this.target.Write(this.emitBuffer, 0, this.emitLen); this.emitLen = 0; } } From 4e73471d96f1ed4c6078f75bc4d1b4f14a342ed7 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 13:09:08 +0300 Subject: [PATCH 19/99] Small QoL fixes --- .../Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs index a10f40b09..051acf0e8 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs @@ -2,18 +2,15 @@ // Licensed under the Apache License, Version 2.0. using System; -using System.Collections.Generic; using System.IO; using System.Runtime.CompilerServices; -using System.Text; using System.Threading; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { - internal class YCbCrEncoder - where TPixel : unmanaged, IPixel + internal class YCbCrEncoder { /// /// Number of bytes cached before being written to target stream via Stream.Write(byte[], offest, count). @@ -57,8 +54,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder private Block8x8F temporalBlock1; private Block8x8F temporalBlock2; - private ImageFrame source; - /// /// The output stream. All attempted writes after the first error become no-ops. /// @@ -290,11 +285,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder private void EncodeGrayscale(Image pixels, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) - // (Partially done with YCbCrForwardConverter) - Block8x8F temp1 = default; - Block8x8F temp2 = default; - Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable; var unzig = ZigZag.CreateUnzigTable(); From 368f89e4509a053a35c5b52d9fc679ba6163c10a Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 16:11:17 +0300 Subject: [PATCH 20/99] Moved quantization table initialization logic to JpegEncoderCore --- .../Encoder/YCbCrEncoder{TPixel}.cs | 146 +++--------------- .../Formats/Jpeg/JpegEncoderCore.cs | 110 ++++++++++++- 2 files changed, 123 insertions(+), 133 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs index 051acf0e8..db2a3c354 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs @@ -41,16 +41,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// private uint bitCount; - /// - /// The scaled chrominance table, in zig-zag order. - /// - private Block8x8F chrominanceQuantTable; - - /// - /// The scaled luminance table, in zig-zag order. - /// - private Block8x8F luminanceQuantTable; - private Block8x8F temporalBlock1; private Block8x8F temporalBlock2; @@ -82,71 +72,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder 8, 8, 8, }; - /// - /// Gets the unscaled quantization tables in zig-zag order. Each - /// encoder copies and scales the tables according to its quality parameter. - /// The values are derived from section K.1 after converting from natural to - /// zig-zag order. - /// - // The C# compiler emits this as a compile-time constant embedded in the PE file. - // This is effectively compiled down to: return new ReadOnlySpan(&data, length) - // More details can be found: https://github.com/dotnet/roslyn/pull/24621 - private static ReadOnlySpan UnscaledQuant_Luminance => new byte[] - { - // Luminance. - 16, 11, 12, 14, 12, 10, 16, 14, 13, 14, 18, 17, 16, 19, 24, - 40, 26, 24, 22, 22, 24, 49, 35, 37, 29, 40, 58, 51, 61, 60, - 57, 51, 56, 55, 64, 72, 92, 78, 64, 68, 87, 69, 55, 56, 80, - 109, 81, 87, 95, 98, 103, 104, 103, 62, 77, 113, 121, 112, - 100, 120, 92, 101, 103, 99, - }; - - /// - /// Gets the unscaled quantization tables in zig-zag order. Each - /// encoder copies and scales the tables according to its quality parameter. - /// The values are derived from section K.1 after converting from natural to - /// zig-zag order. - /// - // The C# compiler emits this as a compile-time constant embedded in the PE file. - // This is effectively compiled down to: return new ReadOnlySpan(&data, length) - // More details can be found: https://github.com/dotnet/roslyn/pull/24621 - private static ReadOnlySpan UnscaledQuant_Chrominance => new byte[] - { - // Chrominance. - 17, 18, 18, 24, 21, 24, 47, 26, 26, 47, 99, 66, 56, 66, - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - }; - - - public ref Block8x8F ChrominanceQuantizationTable => ref this.chrominanceQuantTable; - - public ref Block8x8F LuminanceQuantizationTable => ref this.luminanceQuantTable; - - - public YCbCrEncoder(Stream outputStream, int componentCount, int quality) + public YCbCrEncoder(Stream outputStream) { this.target = outputStream; - - // Convert from a quality rating to a scaling factor. - int scale; - if (quality < 50) - { - scale = 5000 / quality; - } - else - { - scale = 200 - (quality * 2); - } - - // Initialize the quantization tables. - InitQuantizationTable(0, scale, ref this.luminanceQuantTable); - if (componentCount > 1) - { - InitQuantizationTable(1, scale, ref this.chrominanceQuantTable); - } } /// @@ -155,12 +83,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The pixel format. /// The pixel accessor providing access to the image pixels. /// The token to monitor for cancellation. - private void Encode444(Image pixels, CancellationToken cancellationToken) + private void Encode444(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable; - Block8x8F onStackChrominanceQuantTable = this.chrominanceQuantTable; - var unzig = ZigZag.CreateUnzigTable(); // ReSharper disable once InconsistentNaming @@ -184,21 +109,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder QuantIndex.Luminance, prevDCY, ref pixelConverter.Y, - ref onStackLuminanceQuantTable, + ref luminanceQuantTable, ref unzig); prevDCCb = this.WriteBlock( QuantIndex.Chrominance, prevDCCb, ref pixelConverter.Cb, - ref onStackChrominanceQuantTable, + ref chrominanceQuantTable, ref unzig); prevDCCr = this.WriteBlock( QuantIndex.Chrominance, prevDCCr, ref pixelConverter.Cr, - ref onStackChrominanceQuantTable, + ref chrominanceQuantTable, ref unzig); } } @@ -211,7 +136,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The pixel format. /// The pixel accessor providing access to the image pixels. /// The token to monitor for cancellation. - private void Encode420(Image pixels, CancellationToken cancellationToken) + private void Encode420(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) @@ -219,9 +144,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder Span cb = stackalloc Block8x8F[4]; Span cr = stackalloc Block8x8F[4]; - Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable; - Block8x8F onStackChrominanceQuantTable = this.chrominanceQuantTable; - var unzig = ZigZag.CreateUnzigTable(); var pixelConverter = YCbCrForwardConverter.Create(); @@ -252,7 +174,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder QuantIndex.Luminance, prevDCY, ref pixelConverter.Y, - ref onStackLuminanceQuantTable, + ref luminanceQuantTable, ref unzig); } @@ -261,7 +183,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder QuantIndex.Chrominance, prevDCCb, ref b, - ref onStackChrominanceQuantTable, + ref chrominanceQuantTable, ref unzig); Block8x8F.Scale16X16To8X8(ref b, cr); @@ -269,7 +191,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder QuantIndex.Chrominance, prevDCCr, ref b, - ref onStackChrominanceQuantTable, + ref chrominanceQuantTable, ref unzig); } } @@ -282,11 +204,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The pixel format. /// The pixel accessor providing access to the image pixels. /// The token to monitor for cancellation. - private void EncodeGrayscale(Image pixels, CancellationToken cancellationToken) + private void EncodeGrayscale(Image pixels, ref Block8x8F luminanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable; - var unzig = ZigZag.CreateUnzigTable(); // ReSharper disable once InconsistentNaming @@ -310,28 +230,34 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder QuantIndex.Luminance, prevDCY, ref pixelConverter.Y, - ref onStackLuminanceQuantTable, + ref luminanceQuantTable, ref unzig); } } } - public void WriteStartOfScan(Image image, JpegColorType? colorType, JpegSubsample? subsample, CancellationToken cancellationToken) + public void WriteStartOfScan( + Image image, + JpegColorType? colorType, + JpegSubsample? subsample, + ref Block8x8F luminanceQuantTable, + ref Block8x8F chrominanceTable, + CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { if (colorType == JpegColorType.Luminance) { - this.EncodeGrayscale(image, cancellationToken); + this.EncodeGrayscale(image, ref luminanceQuantTable, cancellationToken); } else { switch (subsample) { case JpegSubsample.Ratio444: - this.Encode444(image, cancellationToken); + this.Encode444(image, ref luminanceQuantTable, ref chrominanceTable, cancellationToken); break; case JpegSubsample.Ratio420: - this.Encode420(image, cancellationToken); + this.Encode420(image, ref luminanceQuantTable, ref chrominanceTable, cancellationToken); break; } } @@ -499,35 +425,5 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder this.Emit((uint)b & (uint)((1 << ((int)bt)) - 1), bt); } } - - - /// - /// Initializes quantization table. - /// - /// The quantization index. - /// The scaling factor. - /// The quantization table. - private static void InitQuantizationTable(int i, int scale, ref Block8x8F quant) - { - DebugGuard.MustBeBetweenOrEqualTo(i, 0, 1, nameof(i)); - ReadOnlySpan unscaledQuant = (i == 0) ? UnscaledQuant_Luminance : UnscaledQuant_Chrominance; - - for (int j = 0; j < Block8x8F.Size; j++) - { - int x = unscaledQuant[j]; - x = ((x * scale) + 50) / 100; - if (x < 1) - { - x = 1; - } - - if (x > 255) - { - x = 255; - } - - quant[j] = x; - } - } } } diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index 2625d490c..6b58ef483 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -31,6 +31,45 @@ namespace SixLabors.ImageSharp.Formats.Jpeg /// private const int QuantizationTableCount = 2; + /// + /// Gets the unscaled quantization tables in zig-zag order. Each + /// encoder copies and scales the tables according to its quality parameter. + /// The values are derived from section K.1 after converting from natural to + /// zig-zag order. + /// + // The C# compiler emits this as a compile-time constant embedded in the PE file. + // This is effectively compiled down to: return new ReadOnlySpan(&data, length) + // More details can be found: https://github.com/dotnet/roslyn/pull/24621 + private static ReadOnlySpan UnscaledQuant_Luminance => new byte[] + { + // Luminance. + 16, 11, 12, 14, 12, 10, 16, 14, 13, 14, 18, 17, 16, 19, 24, + 40, 26, 24, 22, 22, 24, 49, 35, 37, 29, 40, 58, 51, 61, 60, + 57, 51, 56, 55, 64, 72, 92, 78, 64, 68, 87, 69, 55, 56, 80, + 109, 81, 87, 95, 98, 103, 104, 103, 62, 77, 113, 121, 112, + 100, 120, 92, 101, 103, 99, + }; + + /// + /// Gets the unscaled quantization tables in zig-zag order. Each + /// encoder copies and scales the tables according to its quality parameter. + /// The values are derived from section K.1 after converting from natural to + /// zig-zag order. + /// + // The C# compiler emits this as a compile-time constant embedded in the PE file. + // This is effectively compiled down to: return new ReadOnlySpan(&data, length) + // More details can be found: https://github.com/dotnet/roslyn/pull/24621 + private static ReadOnlySpan UnscaledQuant_Chrominance => new byte[] + { + // Chrominance. + 17, 18, 18, 24, 21, 24, 47, 26, 26, 47, 99, 66, 56, 66, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + }; + + /// /// A scratch buffer to reduce allocations. /// @@ -97,7 +136,27 @@ namespace SixLabors.ImageSharp.Formats.Jpeg int qlty = Numerics.Clamp(this.quality ?? metadata.GetJpegMetadata().Quality, 1, 100); this.subsample ??= qlty >= 91 ? JpegSubsample.Ratio444 : JpegSubsample.Ratio420; - YCbCrEncoder scanEncoder = new YCbCrEncoder(stream, componentCount, qlty); + // Convert from a quality rating to a scaling factor. + int scale; + if (qlty < 50) + { + scale = 5000 / qlty; + } + else + { + scale = 200 - (qlty * 2); + } + + // Initialize the quantization tables. + // TODO: This looks ugly, should we write chrominance table for luminance-only images? + // If not - this can code can be simplified + Block8x8F luminanceQuantTable = default; + Block8x8F chrominanceQuantTable = default; + InitQuantizationTable(0, scale, ref luminanceQuantTable); + if (componentCount > 1) + { + InitQuantizationTable(1, scale, ref chrominanceQuantTable); + } // Write the Start Of Image marker. this.WriteApplicationHeader(metadata); @@ -106,7 +165,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.WriteProfiles(metadata); // Write the quantization tables. - this.WriteDefineQuantizationTables(ref scanEncoder.LuminanceQuantizationTable, ref scanEncoder.ChrominanceQuantizationTable); + this.WriteDefineQuantizationTables(ref luminanceQuantTable, ref chrominanceQuantTable); // Write the image dimensions. this.WriteStartOfFrame(image.Width, image.Height, componentCount); @@ -114,8 +173,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg // Write the Huffman tables. this.WriteDefineHuffmanTables(componentCount); - // Write the image data. - this.WriteStartOfScan(scanEncoder, image, componentCount, cancellationToken); + // Write the scan header. + this.WriteStartOfScan(image, componentCount, cancellationToken); + + // Write the scan compressed data. + new YCbCrEncoder(stream).WriteStartOfScan( + image, + this.colorType, + this.subsample, + ref luminanceQuantTable, + ref chrominanceQuantTable, + cancellationToken); // Write the End Of Image marker. this.buffer[0] = JpegConstants.Markers.XFF; @@ -573,7 +641,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg /// The pixel accessor providing access to the image pixels. /// The number of components in a pixel. /// The token to monitor for cancellation. - private void WriteStartOfScan(YCbCrEncoder scanEncoder, Image image, int componentCount, CancellationToken cancellationToken) + private void WriteStartOfScan(Image image, int componentCount, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) @@ -618,9 +686,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.buffer[sosSize] = 0x3f; // Se - End of spectral selection. this.buffer[sosSize + 1] = 0x00; // Ah + Ah (Successive approximation bit position high + low) this.outputStream.Write(this.buffer, 0, sosSize + 2); - - - scanEncoder.WriteStartOfScan(image, this.colorType, this.subsample, cancellationToken); } /// @@ -637,5 +702,34 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.buffer[3] = (byte)(length & 0xff); this.outputStream.Write(this.buffer, 0, 4); } + + /// + /// Initializes quantization table. + /// + /// The quantization index. + /// The scaling factor. + /// The quantization table. + private static void InitQuantizationTable(int i, int scale, ref Block8x8F quant) + { + DebugGuard.MustBeBetweenOrEqualTo(i, 0, 1, nameof(i)); + ReadOnlySpan unscaledQuant = (i == 0) ? UnscaledQuant_Luminance : UnscaledQuant_Chrominance; + + for (int j = 0; j < Block8x8F.Size; j++) + { + int x = unscaledQuant[j]; + x = ((x * scale) + 50) / 100; + if (x < 1) + { + x = 1; + } + + if (x > 255) + { + x = 255; + } + + quant[j] = x; + } + } } } From 9d7adb6bf795a2941057ea20c335e9a747861078 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 16:14:38 +0300 Subject: [PATCH 21/99] Fixed comments --- .../Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs index db2a3c354..8256348a8 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs @@ -17,7 +17,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// /// This is subject to change, 1024 seems to be the best value in terms of performance. - /// expects it to be at least 8 (see comments in method body). + /// expects it to be at least 8 (see comments in method body). /// private const int EmitBufferSizeInBytes = 1024; @@ -32,7 +32,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder private int emitLen = 0; /// - /// Emmited bits 'micro buffer' before being transfered to the . + /// Emmited bits 'micro buffer' before being transfered to the . /// private uint accumulatedBits; @@ -82,6 +82,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// The pixel format. /// The pixel accessor providing access to the image pixels. + /// Luminance quantization table provided by the callee + /// Chrominance quantization table provided by the callee /// The token to monitor for cancellation. private void Encode444(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel @@ -135,6 +137,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// The pixel format. /// The pixel accessor providing access to the image pixels. + /// Luminance quantization table provided by the callee + /// Chrominance quantization table provided by the callee /// The token to monitor for cancellation. private void Encode420(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel @@ -203,6 +207,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// The pixel format. /// The pixel accessor providing access to the image pixels. + /// Luminance quantization table provided by the callee /// The token to monitor for cancellation. private void EncodeGrayscale(Image pixels, ref Block8x8F luminanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel From 3380bdf0d017dad810521d7e30197289f6495147 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 16:15:27 +0300 Subject: [PATCH 22/99] Renamed YCbCrEncoder to HuffmanScanEncoder as it is in decoding logic --- .../{YCbCrEncoder{TPixel}.cs => HuffmanScanEncoder.cs} | 4 ++-- src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename src/ImageSharp/Formats/Jpeg/Components/Encoder/{YCbCrEncoder{TPixel}.cs => HuffmanScanEncoder.cs} (99%) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs similarity index 99% rename from src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs rename to src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 8256348a8..72300e6fb 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrEncoder{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -10,7 +10,7 @@ using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { - internal class YCbCrEncoder + internal class HuffmanScanEncoder { /// /// Number of bytes cached before being written to target stream via Stream.Write(byte[], offest, count). @@ -72,7 +72,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder 8, 8, 8, }; - public YCbCrEncoder(Stream outputStream) + public HuffmanScanEncoder(Stream outputStream) { this.target = outputStream; } diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index 6b58ef483..e9a5f7e02 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -177,7 +177,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.WriteStartOfScan(image, componentCount, cancellationToken); // Write the scan compressed data. - new YCbCrEncoder(stream).WriteStartOfScan( + new HuffmanScanEncoder(stream).WriteStartOfScan( image, this.colorType, this.subsample, From 7e0a317461e8eba128c97bb205396d71ae687a6d Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 16:54:09 +0300 Subject: [PATCH 23/99] Moved encode method choice to the JpegEncoderCore --- .../Components/Encoder/HuffmanScanEncoder.cs | 41 +++++-------------- .../Formats/Jpeg/JpegEncoderCore.cs | 35 +++++++++------- 2 files changed, 32 insertions(+), 44 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 72300e6fb..0b05b955d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -85,7 +85,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// Luminance quantization table provided by the callee /// Chrominance quantization table provided by the callee /// The token to monitor for cancellation. - private void Encode444(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken) + public void Encode444(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { var unzig = ZigZag.CreateUnzigTable(); @@ -129,6 +129,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref unzig); } } + + // Pad the last byte with 1's. + this.Emit(0x7f, 7); + this.target.Write(this.emitBuffer, 0, this.emitLen); } /// @@ -140,7 +144,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// Luminance quantization table provided by the callee /// Chrominance quantization table provided by the callee /// The token to monitor for cancellation. - private void Encode420(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken) + public void Encode420(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) @@ -199,6 +203,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref unzig); } } + + // Pad the last byte with 1's. + this.Emit(0x7f, 7); + this.target.Write(this.emitBuffer, 0, this.emitLen); } @@ -209,7 +217,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The pixel accessor providing access to the image pixels. /// Luminance quantization table provided by the callee /// The token to monitor for cancellation. - private void EncodeGrayscale(Image pixels, ref Block8x8F luminanceQuantTable, CancellationToken cancellationToken) + public void EncodeGrayscale(Image pixels, ref Block8x8F luminanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { var unzig = ZigZag.CreateUnzigTable(); @@ -239,33 +247,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref unzig); } } - } - - public void WriteStartOfScan( - Image image, - JpegColorType? colorType, - JpegSubsample? subsample, - ref Block8x8F luminanceQuantTable, - ref Block8x8F chrominanceTable, - CancellationToken cancellationToken) - where TPixel : unmanaged, IPixel - { - if (colorType == JpegColorType.Luminance) - { - this.EncodeGrayscale(image, ref luminanceQuantTable, cancellationToken); - } - else - { - switch (subsample) - { - case JpegSubsample.Ratio444: - this.Encode444(image, ref luminanceQuantTable, ref chrominanceTable, cancellationToken); - break; - case JpegSubsample.Ratio420: - this.Encode420(image, ref luminanceQuantTable, ref chrominanceTable, cancellationToken); - break; - } - } // Pad the last byte with 1's. this.Emit(0x7f, 7); diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index e9a5f7e02..9ff334453 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -86,9 +86,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg private readonly int? quality; /// - /// Gets or sets the subsampling method to use. + /// Component count. /// - private readonly JpegColorType? colorType; + private readonly int componentCount; /// /// The output stream. All attempted writes after the first error become no-ops. @@ -103,7 +103,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg { this.quality = options.Quality; this.subsample = options.Subsample; - this.colorType = options.ColorType; + this.componentCount = (options.ColorType == JpegColorType.Luminance) ? 1 : 3; } /// @@ -129,9 +129,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.outputStream = stream; ImageMetadata metadata = image.Metadata; - // Compute number of components based on color type in options. - int componentCount = (this.colorType == JpegColorType.Luminance) ? 1 : 3; - // System.Drawing produces identical output for jpegs with a quality parameter of 0 and 1. int qlty = Numerics.Clamp(this.quality ?? metadata.GetJpegMetadata().Quality, 1, 100); this.subsample ??= qlty >= 91 ? JpegSubsample.Ratio444 : JpegSubsample.Ratio420; @@ -153,7 +150,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg Block8x8F luminanceQuantTable = default; Block8x8F chrominanceQuantTable = default; InitQuantizationTable(0, scale, ref luminanceQuantTable); - if (componentCount > 1) + if (this.componentCount > 1) { InitQuantizationTable(1, scale, ref chrominanceQuantTable); } @@ -177,13 +174,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.WriteStartOfScan(image, componentCount, cancellationToken); // Write the scan compressed data. - new HuffmanScanEncoder(stream).WriteStartOfScan( - image, - this.colorType, - this.subsample, - ref luminanceQuantTable, - ref chrominanceQuantTable, - cancellationToken); + var scanEncoder = new HuffmanScanEncoder(stream); + if (this.componentCount == 1) + { + scanEncoder.EncodeGrayscale(image, ref luminanceQuantTable, cancellationToken); + } + else + { + switch (subsample) + { + case JpegSubsample.Ratio444: + scanEncoder.Encode444(image, ref luminanceQuantTable, ref chrominanceQuantTable, cancellationToken); + break; + case JpegSubsample.Ratio420: + scanEncoder.Encode420(image, ref luminanceQuantTable, ref chrominanceQuantTable, cancellationToken); + break; + } + } // Write the End Of Image marker. this.buffer[0] = JpegConstants.Markers.XFF; From 1b1d136f8c860bed912809ef86e43100bb80987d Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 17:13:22 +0300 Subject: [PATCH 24/99] Fixed unresolved reference this.colorType --- src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index 9ff334453..b8568c4ab 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -587,7 +587,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg 0x01 }; - if (this.colorType == JpegColorType.Luminance) + if (this.componentCount == 1) { subsamples = stackalloc byte[] { From 5b05a0a1da0497661e98f499b5b482193c189c4e Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 17:35:40 +0300 Subject: [PATCH 25/99] Added QoL throw helper method for jpeg w/h size check before encoding --- src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs | 8 ++++---- src/ImageSharp/Formats/Jpeg/JpegThrowHelper.cs | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index b8568c4ab..169a3cbb7 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -118,14 +118,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg { Guard.NotNull(image, nameof(image)); Guard.NotNull(stream, nameof(stream)); - cancellationToken.ThrowIfCancellationRequested(); - const ushort max = JpegConstants.MaxLength; - if (image.Width >= max || image.Height >= max) + if (image.Width >= JpegConstants.MaxLength || image.Height >= JpegConstants.MaxLength) { - throw new ImageFormatException($"Image is too large to encode at {image.Width}x{image.Height}."); + JpegThrowHelper.ThrowDimensionsTooLarge(image.Width, image.Height); } + cancellationToken.ThrowIfCancellationRequested(); + this.outputStream = stream; ImageMetadata metadata = image.Metadata; diff --git a/src/ImageSharp/Formats/Jpeg/JpegThrowHelper.cs b/src/ImageSharp/Formats/Jpeg/JpegThrowHelper.cs index fa9eb8391..cc75870e1 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegThrowHelper.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegThrowHelper.cs @@ -46,5 +46,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg [MethodImpl(InliningOptions.ColdPath)] public static void ThrowInvalidImageDimensions(int width, int height) => throw new InvalidImageContentException($"Invalid image dimensions: {width}x{height}."); + + [MethodImpl(InliningOptions.ColdPath)] + public static void ThrowDimensionsTooLarge(int width, int height) => throw new ImageFormatException($"Image is too large to encode at {width}x{height} for JPEG format."); } } From 84a143d0951b59c657730a7f0f4df57b4cfa92ce Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 21 May 2021 17:38:55 +0300 Subject: [PATCH 26/99] Moved end of image marker writing code to a separate method --- src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index 169a3cbb7..744f82bda 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -193,9 +193,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg } // Write the End Of Image marker. - this.buffer[0] = JpegConstants.Markers.XFF; - this.buffer[1] = JpegConstants.Markers.EOI; - stream.Write(this.buffer, 0, 2); + this.WriteEndOfImageMarker(); + stream.Flush(); } @@ -695,6 +694,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.outputStream.Write(this.buffer, 0, sosSize + 2); } + /// + /// Writes the EndOfImage marker. + /// + private void WriteEndOfImageMarker() + { + this.buffer[0] = JpegConstants.Markers.XFF; + this.buffer[1] = JpegConstants.Markers.EOI; + this.outputStream.Write(this.buffer, 0, 2); + } + /// /// Writes the header for a marker with the given length. /// From d4fa8b254bce6c82ee8cdd2b7fa1a5d27e766508 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 22 May 2021 08:17:31 +0300 Subject: [PATCH 27/99] Rolled back to initial JpegEncoderCore options implementation. --- src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index 744f82bda..b7459bdc7 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -86,9 +86,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg private readonly int? quality; /// - /// Component count. + /// Gets or sets the subsampling method to use. /// - private readonly int componentCount; + private readonly JpegColorType? colorType; /// /// The output stream. All attempted writes after the first error become no-ops. @@ -103,7 +103,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg { this.quality = options.Quality; this.subsample = options.Subsample; - this.componentCount = (options.ColorType == JpegColorType.Luminance) ? 1 : 3; + this.colorType = options.ColorType; } /// @@ -129,6 +129,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg this.outputStream = stream; ImageMetadata metadata = image.Metadata; + // Compute number of components based on color type in options. + int componentCount = (this.colorType == JpegColorType.Luminance) ? 1 : 3; + // System.Drawing produces identical output for jpegs with a quality parameter of 0 and 1. int qlty = Numerics.Clamp(this.quality ?? metadata.GetJpegMetadata().Quality, 1, 100); this.subsample ??= qlty >= 91 ? JpegSubsample.Ratio444 : JpegSubsample.Ratio420; @@ -150,7 +153,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg Block8x8F luminanceQuantTable = default; Block8x8F chrominanceQuantTable = default; InitQuantizationTable(0, scale, ref luminanceQuantTable); - if (this.componentCount > 1) + if (componentCount > 1) { InitQuantizationTable(1, scale, ref chrominanceQuantTable); } @@ -175,7 +178,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg // Write the scan compressed data. var scanEncoder = new HuffmanScanEncoder(stream); - if (this.componentCount == 1) + if (this.colorType == JpegColorType.Luminance) { scanEncoder.EncodeGrayscale(image, ref luminanceQuantTable, cancellationToken); } @@ -586,7 +589,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg 0x01 }; - if (this.componentCount == 1) + if (this.colorType == JpegColorType.Luminance) { subsamples = stackalloc byte[] { From 980f2d2e7f17d98c7cad64b518590c23d457961e Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 22 May 2021 08:29:45 +0300 Subject: [PATCH 28/99] Revert "Block8x8F.MultiplyInPlace no longer use unsafe casts" This reverts commit fbf0ff1466ef410de2fb77d22c6cdef074cad6ce. --- .../Formats/Jpeg/Components/Block8x8F.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 9072ca196..91aec3005 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -313,14 +313,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components if (Avx.IsSupported) { var valueVec = Vector256.Create(value); - this.V0 = Avx.Multiply(this.V0, valueVec); - this.V1 = Avx.Multiply(this.V1, valueVec); - this.V2 = Avx.Multiply(this.V2, valueVec); - this.V3 = Avx.Multiply(this.V3, valueVec); - this.V4 = Avx.Multiply(this.V4, valueVec); - this.V5 = Avx.Multiply(this.V5, valueVec); - this.V6 = Avx.Multiply(this.V6, valueVec); - this.V7 = Avx.Multiply(this.V7, valueVec); + Unsafe.As>(ref this.V0L) = Avx.Multiply(Unsafe.As>(ref this.V0L), valueVec); + Unsafe.As>(ref this.V1L) = Avx.Multiply(Unsafe.As>(ref this.V1L), valueVec); + Unsafe.As>(ref this.V2L) = Avx.Multiply(Unsafe.As>(ref this.V2L), valueVec); + Unsafe.As>(ref this.V3L) = Avx.Multiply(Unsafe.As>(ref this.V3L), valueVec); + Unsafe.As>(ref this.V4L) = Avx.Multiply(Unsafe.As>(ref this.V4L), valueVec); + Unsafe.As>(ref this.V5L) = Avx.Multiply(Unsafe.As>(ref this.V5L), valueVec); + Unsafe.As>(ref this.V6L) = Avx.Multiply(Unsafe.As>(ref this.V6L), valueVec); + Unsafe.As>(ref this.V7L) = Avx.Multiply(Unsafe.As>(ref this.V7L), valueVec); } else #endif From f1886add1639105fe89050f18feb7fa8d00423f7 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 22 May 2021 08:29:48 +0300 Subject: [PATCH 29/99] Revert "Block8x8F.TransposeInto no longer uses unsafe casts (partially)" This reverts commit 20236b8c756ecbd6fd75c789b58dca5ed028d1e9. --- .../Formats/Jpeg/Components/Block8x8F.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 91aec3005..dbc22eaea 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -840,26 +840,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Vector256 t0 = Avx.UnpackLow(r0, r1); Vector256 t2 = Avx.UnpackLow(r2, r3); Vector256 v = Avx.Shuffle(t0, t2, 0x4E); - d.V0 = Avx.Blend(t0, v, 0xCC); - d.V1 = Avx.Blend(t2, v, 0x33); + Unsafe.As>(ref d.V0L) = Avx.Blend(t0, v, 0xCC); + Unsafe.As>(ref d.V1L) = Avx.Blend(t2, v, 0x33); Vector256 t4 = Avx.UnpackLow(r4, r5); Vector256 t6 = Avx.UnpackLow(r6, r7); v = Avx.Shuffle(t4, t6, 0x4E); - d.V4 = Avx.Blend(t4, v, 0xCC); - d.V5 = Avx.Blend(t6, v, 0x33); + Unsafe.As>(ref d.V4L) = Avx.Blend(t4, v, 0xCC); + Unsafe.As>(ref d.V5L) = Avx.Blend(t6, v, 0x33); Vector256 t1 = Avx.UnpackHigh(r0, r1); Vector256 t3 = Avx.UnpackHigh(r2, r3); v = Avx.Shuffle(t1, t3, 0x4E); - d.V2 = Avx.Blend(t1, v, 0xCC); - d.V3 = Avx.Blend(t3, v, 0x33); + Unsafe.As>(ref d.V2L) = Avx.Blend(t1, v, 0xCC); + Unsafe.As>(ref d.V3L) = Avx.Blend(t3, v, 0x33); Vector256 t5 = Avx.UnpackHigh(r4, r5); Vector256 t7 = Avx.UnpackHigh(r6, r7); v = Avx.Shuffle(t5, t7, 0x4E); - d.V6 = Avx.Blend(t5, v, 0xCC); - d.V7 = Avx.Blend(t7, v, 0x33); + Unsafe.As>(ref d.V6L) = Avx.Blend(t5, v, 0xCC); + Unsafe.As>(ref d.V7L) = Avx.Blend(t7, v, 0x33); } else #endif From a8f717d7815e6a8c9b31e4a06b715368f7c1378b Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 22 May 2021 09:50:40 +0300 Subject: [PATCH 30/99] Made DCT code prettier with SimdUtils, added summary to 8x8 dct methods, added debug assertion --- .../Components/FastFloatingPointDCT.IDCT.cs | 59 +++++-------------- .../Jpeg/Components/FastFloatingPointDCT.cs | 19 +++--- 2 files changed, 25 insertions(+), 53 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs index fd3ad8d5f..369172a2d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs @@ -2,6 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; #if SUPPORTS_RUNTIME_INTRINSICS @@ -171,14 +172,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components d.V4R = my3 - mb3; } -#if SUPPORTS_RUNTIME_INTRINSICS /// - /// Do IDCT internal operations on the given block. + /// Combined operation of and + /// using AVX commands. /// /// Source /// Destination public static void IDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d) { +#if SUPPORTS_RUNTIME_INTRINSICS + Debug.Assert(Avx.IsSupported, "AVX is required to execute this method"); + Vector256 my1 = s.V1; Vector256 my7 = s.V7; Vector256 mz0 = Avx.Add(my1, my7); @@ -191,40 +195,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Vector256 mz4 = Avx.Multiply(Avx.Add(mz0, mz1), C_V_1_1758); - if (Fma.IsSupported) - { - mz2 = Fma.MultiplyAdd(mz2, C_V_n1_9615, mz4); - mz3 = Fma.MultiplyAdd(mz3, C_V_n0_3901, mz4); - } - else - { - mz2 = Avx.Add(Avx.Multiply(mz2, C_V_n1_9615), mz4); - mz3 = Avx.Add(Avx.Multiply(mz3, C_V_n0_3901), mz4); - } - + mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz2, C_V_n1_9615); + mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz3, C_V_n0_3901); mz0 = Avx.Multiply(mz0, C_V_n0_8999); mz1 = Avx.Multiply(mz1, C_V_n2_5629); + Vector256 mb3 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my7, C_V_0_2986), mz2); + Vector256 mb2 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my5, C_V_2_0531), mz3); + Vector256 mb1 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my3, C_V_3_0727), mz2); + Vector256 mb0 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my1, C_V_1_5013), mz3); - Unsafe.SkipInit(out Vector256 mb3); - Unsafe.SkipInit(out Vector256 mb2); - Unsafe.SkipInit(out Vector256 mb1); - Unsafe.SkipInit(out Vector256 mb0); - - if (Fma.IsSupported) - { - mb3 = Avx.Add(Fma.MultiplyAdd(my7, C_V_0_2986, mz0), mz2); - mb2 = Avx.Add(Fma.MultiplyAdd(my5, C_V_2_0531, mz1), mz3); - mb1 = Avx.Add(Fma.MultiplyAdd(my3, C_V_3_0727, mz1), mz2); - mb0 = Avx.Add(Fma.MultiplyAdd(my1, C_V_1_5013, mz0), mz3); - } - else - { - mb3 = Avx.Add(Avx.Add(Avx.Multiply(my7, C_V_0_2986), mz0), mz2); - mb2 = Avx.Add(Avx.Add(Avx.Multiply(my5, C_V_2_0531), mz1), mz3); - mb1 = Avx.Add(Avx.Add(Avx.Multiply(my3, C_V_3_0727), mz1), mz2); - mb0 = Avx.Add(Avx.Add(Avx.Multiply(my1, C_V_1_5013), mz0), mz3); - } Vector256 my2 = s.V2; Vector256 my6 = s.V6; @@ -233,17 +213,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Vector256 my4 = s.V4; mz0 = Avx.Add(my0, my4); mz1 = Avx.Subtract(my0, my4); - - if (Fma.IsSupported) - { - mz2 = Fma.MultiplyAdd(my6, C_V_n1_8477, mz4); - mz3 = Fma.MultiplyAdd(my2, C_V_0_7653, mz4); - } - else - { - mz2 = Avx.Add(Avx.Multiply(my6, C_V_n1_8477), mz4); - mz3 = Avx.Add(Avx.Multiply(my2, C_V_0_7653), mz4); - } + mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my6, C_V_n1_8477); + mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my2, C_V_0_7653); my0 = Avx.Add(mz0, mz3); my3 = Avx.Subtract(mz0, mz3); @@ -258,7 +229,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components d.V5 = Avx.Subtract(my2, mb2); d.V3 = Avx.Add(my3, mb3); d.V4 = Avx.Subtract(my3, mb3); - } #endif + } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index 4ef4ab7b0..493c0a688 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; #if SUPPORTS_RUNTIME_INTRINSICS @@ -196,14 +197,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components d.V7R = c0 - c3; } -#if SUPPORTS_RUNTIME_INTRINSICS /// - /// + /// Combined operation of and + /// using AVX commands. /// /// Source /// Destination private static void FDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d) { +#if SUPPORTS_RUNTIME_INTRINSICS + Debug.Assert(Avx.IsSupported, "AVX is required to execute this method"); + Vector256 t0 = Avx.Add(s.V0, s.V7); Vector256 t7 = Avx.Subtract(s.V0, s.V7); Vector256 t1 = Avx.Add(s.V1, s.V6); @@ -224,36 +228,33 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Vector256 c2 = Avx.Subtract(t1, t2); // 2 6 + d.V2 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(c2, C_V_0_5411), c3, C_V_1_3065); if (Fma.IsSupported) { - d.V2 = Fma.MultiplyAdd(c2, C_V_0_5411, Avx.Multiply(c3, C_V_1_3065)); d.V6 = Fma.MultiplySubtract(c3, C_V_0_5411, Avx.Multiply(c2, C_V_1_3065)); } else { - d.V2 = Avx.Add(Avx.Multiply(c2, C_V_0_5411), Avx.Multiply(c3, C_V_1_3065)); d.V6 = Avx.Subtract(Avx.Multiply(c3, C_V_0_5411), Avx.Multiply(c2, C_V_1_3065)); } + c3 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(t4, C_V_1_1758), t7, C_V_0_7856); if (Fma.IsSupported) { - c3 = Fma.MultiplyAdd(t4, C_V_1_1758, Avx.Multiply(t7, C_V_0_7856)); c0 = Fma.MultiplySubtract(t7, C_V_1_1758, Avx.Multiply(t4, C_V_0_7856)); } else { - c3 = Avx.Add(Avx.Multiply(t4, C_V_1_1758), Avx.Multiply(t7, C_V_0_7856)); c0 = Avx.Subtract(Avx.Multiply(t7, C_V_1_1758), Avx.Multiply(t4, C_V_0_7856)); } + c2 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(t5, C_V_1_3870), C_V_0_2758, t6); if (Fma.IsSupported) { - c2 = Fma.MultiplyAdd(t5, C_V_1_3870, Avx.Multiply(C_V_0_2758, t6)); c1 = Fma.MultiplySubtract(t6, C_V_1_3870, Avx.Multiply(C_V_0_2758, t5)); } else { - c2 = Avx.Add(Avx.Multiply(t5, C_V_1_3870), Avx.Multiply(C_V_0_2758, t6)); c1 = Avx.Subtract(Avx.Multiply(t6, C_V_1_3870), Avx.Multiply(C_V_0_2758, t5)); } @@ -267,8 +268,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components // 1 7 d.V1 = Avx.Add(c0, c3); d.V7 = Avx.Subtract(c0, c3); - } #endif + } /// /// Performs 8x8 matrix Forward Discrete Cosine Transform From dfb181db8ab693224b7d1f88b669a501f50c409b Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 22 May 2021 09:52:12 +0300 Subject: [PATCH 31/99] Combined FDCT and IDCT code into single file --- .../Components/FastFloatingPointDCT.IDCT.cs | 235 ------------------ .../Jpeg/Components/FastFloatingPointDCT.cs | 214 ++++++++++++++++ 2 files changed, 214 insertions(+), 235 deletions(-) delete mode 100644 src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs deleted file mode 100644 index 369172a2d..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.IDCT.cs +++ /dev/null @@ -1,235 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Diagnostics; -using System.Numerics; -using System.Runtime.CompilerServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -#endif - -// ReSharper disable InconsistentNaming -namespace SixLabors.ImageSharp.Formats.Jpeg.Components -{ - /// - /// Contains inaccurate, but fast forward and inverse DCT implementations. - /// - internal static partial class FastFloatingPointDCT - { - /// - /// Apply floating point IDCT transformation into dest, using a temporary block 'temp' provided by the caller (optimization). - /// Ported from https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L239 - /// - /// Source - /// Destination - /// Temporary block provided by the caller - public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp) - { - src.TransposeInto(ref temp); - - IDCT8x8(ref temp, ref dest); - dest.TransposeInto(ref temp); - IDCT8x8(ref temp, ref dest); - - // TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing? - dest.MultiplyInPlace(C_0_125); - } - - /// - /// Performs 8x8 matrix Inverse Discrete Cosine Transform - /// - /// Source - /// Destination - public static void IDCT8x8(ref Block8x8F s, ref Block8x8F d) - { -#if SUPPORTS_RUNTIME_INTRINSICS - if (Avx.IsSupported) - { - IDCT8x8_Avx(ref s, ref d); - } - else -#endif - { - IDCT8x4_LeftPart(ref s, ref d); - IDCT8x4_RightPart(ref s, ref d); - } - } - - /// - /// Do IDCT internal operations on the left part of the block. Original src: - /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261 - /// - /// The source block - /// Destination block - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void IDCT8x4_LeftPart(ref Block8x8F s, ref Block8x8F d) - { - Vector4 my1 = s.V1L; - Vector4 my7 = s.V7L; - Vector4 mz0 = my1 + my7; - - Vector4 my3 = s.V3L; - Vector4 mz2 = my3 + my7; - Vector4 my5 = s.V5L; - Vector4 mz1 = my3 + my5; - Vector4 mz3 = my1 + my5; - - Vector4 mz4 = (mz0 + mz1) * C_1_175876; - - mz2 = (mz2 * C_1_961571) + mz4; - mz3 = (mz3 * C_0_390181) + mz4; - mz0 = mz0 * C_0_899976; - mz1 = mz1 * C_2_562915; - - Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2; - Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3; - Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2; - Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3; - - Vector4 my2 = s.V2L; - Vector4 my6 = s.V6L; - mz4 = (my2 + my6) * C_0_541196; - Vector4 my0 = s.V0L; - Vector4 my4 = s.V4L; - mz0 = my0 + my4; - mz1 = my0 - my4; - - mz2 = mz4 + (my6 * C_1_847759); - mz3 = mz4 + (my2 * C_0_765367); - - my0 = mz0 + mz3; - my3 = mz0 - mz3; - my1 = mz1 + mz2; - my2 = mz1 - mz2; - - d.V0L = my0 + mb0; - d.V7L = my0 - mb0; - d.V1L = my1 + mb1; - d.V6L = my1 - mb1; - d.V2L = my2 + mb2; - d.V5L = my2 - mb2; - d.V3L = my3 + mb3; - d.V4L = my3 - mb3; - } - - /// - /// Do IDCT internal operations on the right part of the block. - /// Original src: - /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261 - /// - /// The source block - /// The destination block - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void IDCT8x4_RightPart(ref Block8x8F s, ref Block8x8F d) - { - Vector4 my1 = s.V1R; - Vector4 my7 = s.V7R; - Vector4 mz0 = my1 + my7; - - Vector4 my3 = s.V3R; - Vector4 mz2 = my3 + my7; - Vector4 my5 = s.V5R; - Vector4 mz1 = my3 + my5; - Vector4 mz3 = my1 + my5; - - Vector4 mz4 = (mz0 + mz1) * C_1_175876; - - mz2 = (mz2 * C_1_961571) + mz4; - mz3 = (mz3 * C_0_390181) + mz4; - mz0 = mz0 * C_0_899976; - mz1 = mz1 * C_2_562915; - - Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2; - Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3; - Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2; - Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3; - - Vector4 my2 = s.V2R; - Vector4 my6 = s.V6R; - mz4 = (my2 + my6) * C_0_541196; - Vector4 my0 = s.V0R; - Vector4 my4 = s.V4R; - mz0 = my0 + my4; - mz1 = my0 - my4; - - mz2 = mz4 + (my6 * C_1_847759); - mz3 = mz4 + (my2 * C_0_765367); - - my0 = mz0 + mz3; - my3 = mz0 - mz3; - my1 = mz1 + mz2; - my2 = mz1 - mz2; - - d.V0R = my0 + mb0; - d.V7R = my0 - mb0; - d.V1R = my1 + mb1; - d.V6R = my1 - mb1; - d.V2R = my2 + mb2; - d.V5R = my2 - mb2; - d.V3R = my3 + mb3; - d.V4R = my3 - mb3; - } - - /// - /// Combined operation of and - /// using AVX commands. - /// - /// Source - /// Destination - public static void IDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d) - { -#if SUPPORTS_RUNTIME_INTRINSICS - Debug.Assert(Avx.IsSupported, "AVX is required to execute this method"); - - Vector256 my1 = s.V1; - Vector256 my7 = s.V7; - Vector256 mz0 = Avx.Add(my1, my7); - - Vector256 my3 = s.V3; - Vector256 mz2 = Avx.Add(my3, my7); - Vector256 my5 = s.V5; - Vector256 mz1 = Avx.Add(my3, my5); - Vector256 mz3 = Avx.Add(my1, my5); - - Vector256 mz4 = Avx.Multiply(Avx.Add(mz0, mz1), C_V_1_1758); - - mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz2, C_V_n1_9615); - mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz3, C_V_n0_3901); - mz0 = Avx.Multiply(mz0, C_V_n0_8999); - mz1 = Avx.Multiply(mz1, C_V_n2_5629); - - Vector256 mb3 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my7, C_V_0_2986), mz2); - Vector256 mb2 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my5, C_V_2_0531), mz3); - Vector256 mb1 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my3, C_V_3_0727), mz2); - Vector256 mb0 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my1, C_V_1_5013), mz3); - - - Vector256 my2 = s.V2; - Vector256 my6 = s.V6; - mz4 = Avx.Multiply(Avx.Add(my2, my6), C_V_0_5411); - Vector256 my0 = s.V0; - Vector256 my4 = s.V4; - mz0 = Avx.Add(my0, my4); - mz1 = Avx.Subtract(my0, my4); - mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my6, C_V_n1_8477); - mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my2, C_V_0_7653); - - my0 = Avx.Add(mz0, mz3); - my3 = Avx.Subtract(mz0, mz3); - my1 = Avx.Add(mz1, mz2); - my2 = Avx.Subtract(mz1, mz2); - - d.V0 = Avx.Add(my0, mb0); - d.V7 = Avx.Subtract(my0, mb0); - d.V1 = Avx.Add(my1, mb1); - d.V6 = Avx.Subtract(my1, mb1); - d.V2 = Avx.Add(my2, mb2); - d.V5 = Avx.Subtract(my2, mb2); - d.V3 = Avx.Add(my3, mb3); - d.V4 = Avx.Subtract(my3, mb3); -#endif - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index 493c0a688..d7101abfd 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -319,5 +319,219 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components dest.MultiplyInPlace(C_0_125); } + + /// + /// Performs 8x8 matrix Inverse Discrete Cosine Transform + /// + /// Source + /// Destination + public static void IDCT8x8(ref Block8x8F s, ref Block8x8F d) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + IDCT8x8_Avx(ref s, ref d); + } + else +#endif + { + IDCT8x4_LeftPart(ref s, ref d); + IDCT8x4_RightPart(ref s, ref d); + } + } + + /// + /// Do IDCT internal operations on the left part of the block. Original src: + /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261 + /// + /// The source block + /// Destination block + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void IDCT8x4_LeftPart(ref Block8x8F s, ref Block8x8F d) + { + Vector4 my1 = s.V1L; + Vector4 my7 = s.V7L; + Vector4 mz0 = my1 + my7; + + Vector4 my3 = s.V3L; + Vector4 mz2 = my3 + my7; + Vector4 my5 = s.V5L; + Vector4 mz1 = my3 + my5; + Vector4 mz3 = my1 + my5; + + Vector4 mz4 = (mz0 + mz1) * C_1_175876; + + mz2 = (mz2 * C_1_961571) + mz4; + mz3 = (mz3 * C_0_390181) + mz4; + mz0 = mz0 * C_0_899976; + mz1 = mz1 * C_2_562915; + + Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2; + Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3; + Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2; + Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3; + + Vector4 my2 = s.V2L; + Vector4 my6 = s.V6L; + mz4 = (my2 + my6) * C_0_541196; + Vector4 my0 = s.V0L; + Vector4 my4 = s.V4L; + mz0 = my0 + my4; + mz1 = my0 - my4; + + mz2 = mz4 + (my6 * C_1_847759); + mz3 = mz4 + (my2 * C_0_765367); + + my0 = mz0 + mz3; + my3 = mz0 - mz3; + my1 = mz1 + mz2; + my2 = mz1 - mz2; + + d.V0L = my0 + mb0; + d.V7L = my0 - mb0; + d.V1L = my1 + mb1; + d.V6L = my1 - mb1; + d.V2L = my2 + mb2; + d.V5L = my2 - mb2; + d.V3L = my3 + mb3; + d.V4L = my3 - mb3; + } + + /// + /// Do IDCT internal operations on the right part of the block. + /// Original src: + /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261 + /// + /// The source block + /// The destination block + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void IDCT8x4_RightPart(ref Block8x8F s, ref Block8x8F d) + { + Vector4 my1 = s.V1R; + Vector4 my7 = s.V7R; + Vector4 mz0 = my1 + my7; + + Vector4 my3 = s.V3R; + Vector4 mz2 = my3 + my7; + Vector4 my5 = s.V5R; + Vector4 mz1 = my3 + my5; + Vector4 mz3 = my1 + my5; + + Vector4 mz4 = (mz0 + mz1) * C_1_175876; + + mz2 = (mz2 * C_1_961571) + mz4; + mz3 = (mz3 * C_0_390181) + mz4; + mz0 = mz0 * C_0_899976; + mz1 = mz1 * C_2_562915; + + Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2; + Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3; + Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2; + Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3; + + Vector4 my2 = s.V2R; + Vector4 my6 = s.V6R; + mz4 = (my2 + my6) * C_0_541196; + Vector4 my0 = s.V0R; + Vector4 my4 = s.V4R; + mz0 = my0 + my4; + mz1 = my0 - my4; + + mz2 = mz4 + (my6 * C_1_847759); + mz3 = mz4 + (my2 * C_0_765367); + + my0 = mz0 + mz3; + my3 = mz0 - mz3; + my1 = mz1 + mz2; + my2 = mz1 - mz2; + + d.V0R = my0 + mb0; + d.V7R = my0 - mb0; + d.V1R = my1 + mb1; + d.V6R = my1 - mb1; + d.V2R = my2 + mb2; + d.V5R = my2 - mb2; + d.V3R = my3 + mb3; + d.V4R = my3 - mb3; + } + + /// + /// Combined operation of and + /// using AVX commands. + /// + /// Source + /// Destination + public static void IDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d) + { +#if SUPPORTS_RUNTIME_INTRINSICS + Debug.Assert(Avx.IsSupported, "AVX is required to execute this method"); + + Vector256 my1 = s.V1; + Vector256 my7 = s.V7; + Vector256 mz0 = Avx.Add(my1, my7); + + Vector256 my3 = s.V3; + Vector256 mz2 = Avx.Add(my3, my7); + Vector256 my5 = s.V5; + Vector256 mz1 = Avx.Add(my3, my5); + Vector256 mz3 = Avx.Add(my1, my5); + + Vector256 mz4 = Avx.Multiply(Avx.Add(mz0, mz1), C_V_1_1758); + + mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz2, C_V_n1_9615); + mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz3, C_V_n0_3901); + mz0 = Avx.Multiply(mz0, C_V_n0_8999); + mz1 = Avx.Multiply(mz1, C_V_n2_5629); + + Vector256 mb3 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my7, C_V_0_2986), mz2); + Vector256 mb2 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my5, C_V_2_0531), mz3); + Vector256 mb1 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my3, C_V_3_0727), mz2); + Vector256 mb0 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my1, C_V_1_5013), mz3); + + + Vector256 my2 = s.V2; + Vector256 my6 = s.V6; + mz4 = Avx.Multiply(Avx.Add(my2, my6), C_V_0_5411); + Vector256 my0 = s.V0; + Vector256 my4 = s.V4; + mz0 = Avx.Add(my0, my4); + mz1 = Avx.Subtract(my0, my4); + mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my6, C_V_n1_8477); + mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my2, C_V_0_7653); + + my0 = Avx.Add(mz0, mz3); + my3 = Avx.Subtract(mz0, mz3); + my1 = Avx.Add(mz1, mz2); + my2 = Avx.Subtract(mz1, mz2); + + d.V0 = Avx.Add(my0, mb0); + d.V7 = Avx.Subtract(my0, mb0); + d.V1 = Avx.Add(my1, mb1); + d.V6 = Avx.Subtract(my1, mb1); + d.V2 = Avx.Add(my2, mb2); + d.V5 = Avx.Subtract(my2, mb2); + d.V3 = Avx.Add(my3, mb3); + d.V4 = Avx.Subtract(my3, mb3); +#endif + } + + /// + /// Apply floating point IDCT transformation into dest, using a temporary block 'temp' provided by the caller (optimization). + /// Ported from https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L239 + /// + /// Source + /// Destination + /// Temporary block provided by the caller + public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp) + { + src.TransposeInto(ref temp); + + IDCT8x8(ref temp, ref dest); + dest.TransposeInto(ref temp); + IDCT8x8(ref temp, ref dest); + + // TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing? + dest.MultiplyInPlace(C_0_125); + } } } From 0424d8db71a9d216e51e118a83655b9a6d41be45 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 22 May 2021 11:31:55 +0300 Subject: [PATCH 32/99] Codestyle changes --- .../Jpeg/Components/Encoder/HuffmanScanEncoder.cs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 0b05b955d..8b23211d3 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -24,7 +24,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// A buffer for reducing the number of stream writes when emitting Huffman tables. /// - private byte[] emitBuffer = new byte[EmitBufferSizeInBytes]; + private readonly byte[] emitBuffer = new byte[EmitBufferSizeInBytes]; /// /// Number of filled bytes in buffer @@ -47,7 +47,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// The output stream. All attempted writes after the first error become no-ops. /// - private Stream target; + private readonly Stream target; + + public HuffmanScanEncoder(Stream outputStream) + { + this.target = outputStream; + } /// /// Gets the counts the number of bits needed to hold an integer. @@ -72,11 +77,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder 8, 8, 8, }; - public HuffmanScanEncoder(Stream outputStream) - { - this.target = outputStream; - } - /// /// Encodes the image with no subsampling. /// @@ -209,7 +209,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder this.target.Write(this.emitBuffer, 0, this.emitLen); } - /// /// Encodes the image with no chroma, just luminance. /// From d12bb3e648d9dcb7242e49f36b80274063ea0c0b Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 24 May 2021 15:47:32 +0300 Subject: [PATCH 33/99] Improved jpeg encoding benchmark, updated benchmark 'baseline' for current encoding implementation --- .../Codecs/Jpeg/EncodeJpeg.cs | 63 ++++++++++++++----- 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs index 5a9ceea94..839f19e87 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs @@ -4,6 +4,7 @@ using System.Drawing.Imaging; using System.IO; using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Tests; using SDImage = System.Drawing.Image; @@ -12,10 +13,23 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { public class EncodeJpeg { - // System.Drawing needs this. - private Stream bmpStream; + private const string TestImage = TestImages.Jpeg.BenchmarkSuite.Jpeg420Exif_MidSizeYCbCr; + private const int EncodingQuality = 100; + + // GDI+ uses 4:1:1 subsampling - https://stackoverflow.com/questions/745610/how-to-disable-subsampling-with-net-gdi + // ImageSharp lowest subsampling is 4:2:0 which is an okay approximation + private const JpegSubsample EncodingSubsampling = JpegSubsample.Ratio420; + + // System.Drawing private SDImage bmpDrawing; + private Stream bmpStream; + private ImageCodecInfo jpegCodec; + private EncoderParameters encoderParameters; + + // ImageSharp private Image bmpCore; + private JpegEncoder encoder; + private MemoryStream destinationStream; [GlobalSetup] @@ -23,12 +37,19 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { if (this.bmpStream == null) { - const string TestImage = TestImages.Jpeg.BenchmarkSuite.Jpeg420Exif_MidSizeYCbCr; this.bmpStream = File.OpenRead(Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, TestImage)); + this.bmpCore = Image.Load(this.bmpStream); this.bmpCore.Metadata.ExifProfile = null; + this.encoder = new JpegEncoder { Quality = EncodingQuality, Subsample = EncodingSubsampling }; + this.bmpStream.Position = 0; this.bmpDrawing = SDImage.FromStream(this.bmpStream); + this.jpegCodec = GetEncoder(ImageFormat.Jpeg); + this.encoderParameters = new EncoderParameters(1); + // Quality cast to long is necessary + this.encoderParameters.Param[0] = new EncoderParameter(Encoder.Quality, (long)EncodingQuality); + this.destinationStream = new MemoryStream(); } } @@ -45,29 +66,43 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg [Benchmark(Baseline = true, Description = "System.Drawing Jpeg")] public void JpegSystemDrawing() { - this.bmpDrawing.Save(this.destinationStream, ImageFormat.Jpeg); + this.bmpDrawing.Save(this.destinationStream, this.jpegCodec, this.encoderParameters); this.destinationStream.Seek(0, SeekOrigin.Begin); } [Benchmark(Description = "ImageSharp Jpeg")] public void JpegCore() { - this.bmpCore.SaveAsJpeg(this.destinationStream); + this.bmpCore.SaveAsJpeg(this.destinationStream, this.encoder); this.destinationStream.Seek(0, SeekOrigin.Begin); } + + // https://docs.microsoft.com/en-us/dotnet/api/system.drawing.imaging.encoderparameter?redirectedfrom=MSDN&view=net-5.0 + private static ImageCodecInfo GetEncoder(ImageFormat format) + { + ImageCodecInfo[] codecs = ImageCodecInfo.GetImageDecoders(); + foreach (ImageCodecInfo codec in codecs) + { + if (codec.FormatID == format.Guid) + { + return codec; + } + } + return null; + } } } /* -BenchmarkDotNet=v0.12.1, OS=Windows 10.0.18363.959 (1909/November2018Update/19H2) -Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores -.NET Core SDK=3.1.302 - [Host] : .NET Core 3.1.6 (CoreCLR 4.700.20.26901, CoreFX 4.700.20.31603), X64 RyuJIT - DefaultJob : .NET Core 3.1.6 (CoreCLR 4.700.20.26901, CoreFX 4.700.20.31603), X64 RyuJIT +BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19042 +Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores +.NET Core SDK=6.0.100-preview.3.21202.5 + [Host] : .NET Core 3.1.13 (CoreCLR 4.700.21.11102, CoreFX 4.700.21.11602), X64 RyuJIT + DefaultJob : .NET Core 3.1.13 (CoreCLR 4.700.21.11102, CoreFX 4.700.21.11602), X64 RyuJIT -| Method | Mean | Error | StdDev | Ratio | RatioSD | -|---------------------- |---------:|----------:|----------:|------:|--------:| -| 'System.Drawing Jpeg' | 4.297 ms | 0.0244 ms | 0.0228 ms | 1.00 | 0.00 | -| 'ImageSharp Jpeg' | 5.286 ms | 0.1034 ms | 0.0967 ms | 1.23 | 0.02 | +| Method | Mean | Error | StdDev | Ratio | RatioSD | +|---------------------- |---------:|---------:|---------:|------:|--------:| +| 'System.Drawing Jpeg' | 39.54 ms | 0.269 ms | 0.225 ms | 1.00 | 0.00 | +| 'ImageSharp Jpeg' | 47.25 ms | 0.937 ms | 1.219 ms | 1.20 | 0.02 | */ From ae85722da6fe06f7ee68422e58af4f8830170aab Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 24 May 2021 16:33:47 +0300 Subject: [PATCH 34/99] Simplified WriteDefineHuffmanTables method --- .../Formats/Jpeg/JpegEncoderCore.cs | 34 ++----------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index b7459bdc7..c68c0ffb0 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -296,40 +296,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg markerlen += 1 + 16 + s.Values.Length; } - // TODO: this magic constant (array size) should be defined by HuffmanSpec class - // This is a one-time call which can be stackalloc'ed or allocated directly in memory as method local array - // Allocation here would be better for GC so it won't live for entire encoding process - // TODO: if this is allocated on the heap - pin it right here or following copy code will corrupt memory - Span huffmanBuffer = stackalloc byte[179]; - byte* huffmanBufferPtr = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(huffmanBuffer)); - this.WriteMarkerHeader(JpegConstants.Markers.DHT, markerlen); for (int i = 0; i < specs.Length; i++) { - ref HuffmanSpec spec = ref specs[i]; - - int len = 0; - - // header - huffmanBuffer[len++] = headers[i]; - - // count - fixed (byte* countPtr = spec.Count) - { - int countLen = spec.Count.Length; - Unsafe.CopyBlockUnaligned(huffmanBufferPtr + len, countPtr, (uint)countLen); - len += countLen; - } - - // values - fixed (byte* valuesPtr = spec.Values) - { - int valuesLen = spec.Values.Length; - Unsafe.CopyBlockUnaligned(huffmanBufferPtr + len, valuesPtr, (uint)valuesLen); - len += valuesLen; - } - - this.outputStream.Write(huffmanBuffer, 0, len); + this.outputStream.WriteByte(headers[i]); + this.outputStream.Write(specs[i].Count); + this.outputStream.Write(specs[i].Values); } } From a65e50377de0c08c715d08b93ac5c2202e546150 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Tue, 25 May 2021 14:45:26 +0300 Subject: [PATCH 35/99] Added MultiplySubstract method to the HwIntrinsics --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 4faf577fd..00c0d89f0 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -532,6 +532,7 @@ namespace SixLabors.ImageSharp /// /// Performs a multiplication and an addition of the . /// + /// ret = (vm0 * vm1) + va /// The vector to add to the intermediate result. /// The first vector to multiply. /// The second vector to multiply. @@ -552,6 +553,31 @@ namespace SixLabors.ImageSharp } } + /// + /// Performs a multiplication and a substraction of the . + /// + /// ret = (vm0 * vm1) - vs + /// The vector to substract from the intermediate result. + /// The first vector to multiply. + /// The second vector to multiply. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Vector256 MultiplySubstract( + in Vector256 vs, + in Vector256 vm0, + in Vector256 vm1) + { + if (Fma.IsSupported) + { + return Fma.MultiplySubtract(vm1, vm0, vs); + } + else + { + return Avx.Subtract(Avx.Multiply(vm0, vm1), vs); + } + } + + /// /// as many elements as possible, slicing them down (keeping the remainder). /// From 86abb73799c4792036713493d4ccfea2b355ad4a Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Tue, 25 May 2021 14:57:48 +0300 Subject: [PATCH 36/99] Made FDCT8x8_Avx(...) method prettier with SimdUtils --- .../Jpeg/Components/FastFloatingPointDCT.cs | 27 +++---------------- 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index d7101abfd..afcf4158b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -229,34 +229,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components // 2 6 d.V2 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(c2, C_V_0_5411), c3, C_V_1_3065); - if (Fma.IsSupported) - { - d.V6 = Fma.MultiplySubtract(c3, C_V_0_5411, Avx.Multiply(c2, C_V_1_3065)); - } - else - { - d.V6 = Avx.Subtract(Avx.Multiply(c3, C_V_0_5411), Avx.Multiply(c2, C_V_1_3065)); - } + d.V6 = SimdUtils.HwIntrinsics.MultiplySubstract(Avx.Multiply(c2, C_V_1_3065), c3, C_V_0_5411); c3 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(t4, C_V_1_1758), t7, C_V_0_7856); - if (Fma.IsSupported) - { - c0 = Fma.MultiplySubtract(t7, C_V_1_1758, Avx.Multiply(t4, C_V_0_7856)); - } - else - { - c0 = Avx.Subtract(Avx.Multiply(t7, C_V_1_1758), Avx.Multiply(t4, C_V_0_7856)); - } + c0 = SimdUtils.HwIntrinsics.MultiplySubstract(Avx.Multiply(t4, C_V_0_7856), t7, C_V_1_1758); c2 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(t5, C_V_1_3870), C_V_0_2758, t6); - if (Fma.IsSupported) - { - c1 = Fma.MultiplySubtract(t6, C_V_1_3870, Avx.Multiply(C_V_0_2758, t5)); - } - else - { - c1 = Avx.Subtract(Avx.Multiply(t6, C_V_1_3870), Avx.Multiply(C_V_0_2758, t5)); - } + c1 = SimdUtils.HwIntrinsics.MultiplySubstract(Avx.Multiply(C_V_0_2758, t5), t6, C_V_1_3870); // 3 5 d.V3 = Avx.Subtract(c0, c2); From 0664f298d9aa8f4abbfaad608144c762a3024f3c Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Wed, 26 May 2021 13:26:31 +0300 Subject: [PATCH 37/99] Replaced bit count lookup table to lzcnt implementation, Added MinimimBitsToStore to Numberics.cs --- src/ImageSharp/Common/Helpers/Numerics.cs | 12 +++++++ .../Components/Encoder/HuffmanScanEncoder.cs | 34 ++----------------- 2 files changed, 14 insertions(+), 32 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index 058199301..e8ba6dde6 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -825,5 +825,17 @@ namespace SixLabors.ImageSharp return Sse2.ConvertToInt32(vsum); } #endif + + /// + /// Calculates how many minimum bits needed to store given value. + /// + /// Unsigned integer to store + /// Minimum number of bits needed to store given value + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int MinimumBitsToStore(uint number) + { + const int bitInUnsignedInteger = sizeof(uint) * 8; + return bitInUnsignedInteger - BitOperations.LeadingZeroCount(number); + } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 8b23211d3..0c1b4dedc 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -3,6 +3,7 @@ using System; using System.IO; +using System.Numerics; using System.Runtime.CompilerServices; using System.Threading; using SixLabors.ImageSharp.Memory; @@ -54,29 +55,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder this.target = outputStream; } - /// - /// Gets the counts the number of bits needed to hold an integer. - /// - // The C# compiler emits this as a compile-time constant embedded in the PE file. - // This is effectively compiled down to: return new ReadOnlySpan(&data, length) - // More details can be found: https://github.com/dotnet/roslyn/pull/24621 - private static ReadOnlySpan BitCountLut => new byte[] - { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, - }; - /// /// Encodes the image with no subsampling. /// @@ -394,15 +372,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder b = value - 1; } - uint bt; - if (a < 0x100) - { - bt = BitCountLut[a]; - } - else - { - bt = 8 + (uint)BitCountLut[a >> 8]; - } + uint bt = (uint)Numerics.MinimumBitsToStore((uint)a); this.EmitHuff(index, (int)((uint)(runLength << 4) | bt)); if (bt > 0) From 28ea2adb08fef8c59ad50dfc0bc1ad6b7cbf3714 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Wed, 26 May 2021 14:15:48 +0300 Subject: [PATCH 38/99] Fixed comments, removed todo, updated benchmark results --- .../Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs | 1 - tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs | 7 +++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 0c1b4dedc..28eefadc7 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -125,7 +125,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Encode420(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) Block8x8F b = default; Span cb = stackalloc Block8x8F[4]; Span cr = stackalloc Block8x8F[4]; diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs index 839f19e87..90b0501eb 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs @@ -16,8 +16,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg private const string TestImage = TestImages.Jpeg.BenchmarkSuite.Jpeg420Exif_MidSizeYCbCr; private const int EncodingQuality = 100; - // GDI+ uses 4:1:1 subsampling - https://stackoverflow.com/questions/745610/how-to-disable-subsampling-with-net-gdi - // ImageSharp lowest subsampling is 4:2:0 which is an okay approximation + // GDI+ uses 4:2:0 subsampling private const JpegSubsample EncodingSubsampling = JpegSubsample.Ratio420; // System.Drawing @@ -103,6 +102,6 @@ Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores | Method | Mean | Error | StdDev | Ratio | RatioSD | |---------------------- |---------:|---------:|---------:|------:|--------:| -| 'System.Drawing Jpeg' | 39.54 ms | 0.269 ms | 0.225 ms | 1.00 | 0.00 | -| 'ImageSharp Jpeg' | 47.25 ms | 0.937 ms | 1.219 ms | 1.20 | 0.02 | +| 'System.Drawing Jpeg' | 39.67 ms | 0.774 ms | 0.828 ms | 1.00 | 0.00 | +| 'ImageSharp Jpeg' | 45.39 ms | 0.415 ms | 0.346 ms | 1.14 | 0.03 | */ From d2510036a6e19180f0199d8ef37986d932c86f51 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Wed, 26 May 2021 21:53:27 +0300 Subject: [PATCH 39/99] Implemented fallback code for runtimes where BitOperations class is not supported. --- shared-infrastructure | 2 +- src/ImageSharp/Common/Helpers/Numerics.cs | 35 ++++++++++++++++++- .../Components/Encoder/HuffmanScanEncoder.cs | 2 +- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/shared-infrastructure b/shared-infrastructure index 48e73f455..25f565310 160000 --- a/shared-infrastructure +++ b/shared-infrastructure @@ -1 +1 @@ -Subproject commit 48e73f455f15eafefbe3175efc7433e5f277e506 +Subproject commit 25f56531057293e9f1fa8e070b2f780a0c3d7e0c diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index e8ba6dde6..37d2a943c 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -23,6 +23,25 @@ namespace SixLabors.ImageSharp private const int ShuffleAlphaControl = 0b_11_11_11_11; #endif +#if !SUPPORTS_BITOPERATIONS + private static ReadOnlySpan BitCountLut => new byte[] + { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, + }; +#endif + /// /// Determine the Greatest CommonDivisor (GCD) of two numbers. /// @@ -832,10 +851,24 @@ namespace SixLabors.ImageSharp /// Unsigned integer to store /// Minimum number of bits needed to store given value [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int MinimumBitsToStore(uint number) + public static int MinimumBitsToStore16(uint number) { +#if SUPPORTS_BITOPERATIONS const int bitInUnsignedInteger = sizeof(uint) * 8; return bitInUnsignedInteger - BitOperations.LeadingZeroCount(number); +#else + int bt; + if (number < 0x100) + { + bt = BitCountLut[(int)number]; + } + else + { + bt = 8 + BitCountLut[(int)(number >> 8)]; + } + + return bt; +#endif } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 28eefadc7..8f133f0de 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -371,7 +371,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder b = value - 1; } - uint bt = (uint)Numerics.MinimumBitsToStore((uint)a); + uint bt = (uint)Numerics.MinimumBitsToStore16((uint)a); this.EmitHuff(index, (int)((uint)(runLength << 4) | bt)); if (bt > 0) From ceb4fdfae098187e1cce85e3803305d65085ee0f Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 27 May 2021 14:17:14 +0300 Subject: [PATCH 40/99] Replaced unsafe Block8x8F/Vector4 -> Vector256 casts --- .../Formats/Jpeg/Components/Block8x8F.cs | 105 ++++++------------ .../Encoder/RgbToYCbCrConverterVectorized.cs | 8 +- 2 files changed, 41 insertions(+), 72 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index dbc22eaea..340d8e5c5 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -313,14 +313,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components if (Avx.IsSupported) { var valueVec = Vector256.Create(value); - Unsafe.As>(ref this.V0L) = Avx.Multiply(Unsafe.As>(ref this.V0L), valueVec); - Unsafe.As>(ref this.V1L) = Avx.Multiply(Unsafe.As>(ref this.V1L), valueVec); - Unsafe.As>(ref this.V2L) = Avx.Multiply(Unsafe.As>(ref this.V2L), valueVec); - Unsafe.As>(ref this.V3L) = Avx.Multiply(Unsafe.As>(ref this.V3L), valueVec); - Unsafe.As>(ref this.V4L) = Avx.Multiply(Unsafe.As>(ref this.V4L), valueVec); - Unsafe.As>(ref this.V5L) = Avx.Multiply(Unsafe.As>(ref this.V5L), valueVec); - Unsafe.As>(ref this.V6L) = Avx.Multiply(Unsafe.As>(ref this.V6L), valueVec); - Unsafe.As>(ref this.V7L) = Avx.Multiply(Unsafe.As>(ref this.V7L), valueVec); + this.V0 = Avx.Multiply(this.V0, valueVec); + this.V1 = Avx.Multiply(this.V1, valueVec); + this.V2 = Avx.Multiply(this.V2, valueVec); + this.V3 = Avx.Multiply(this.V3, valueVec); + this.V4 = Avx.Multiply(this.V4, valueVec); + this.V5 = Avx.Multiply(this.V5, valueVec); + this.V6 = Avx.Multiply(this.V6, valueVec); + this.V7 = Avx.Multiply(this.V7, valueVec); } else #endif @@ -354,45 +354,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components #if SUPPORTS_RUNTIME_INTRINSICS if (Avx.IsSupported) { - Unsafe.As>(ref this.V0L) - = Avx.Multiply( - Unsafe.As>(ref this.V0L), - Unsafe.As>(ref other.V0L)); - - Unsafe.As>(ref this.V1L) - = Avx.Multiply( - Unsafe.As>(ref this.V1L), - Unsafe.As>(ref other.V1L)); - - Unsafe.As>(ref this.V2L) - = Avx.Multiply( - Unsafe.As>(ref this.V2L), - Unsafe.As>(ref other.V2L)); - - Unsafe.As>(ref this.V3L) - = Avx.Multiply( - Unsafe.As>(ref this.V3L), - Unsafe.As>(ref other.V3L)); - - Unsafe.As>(ref this.V4L) - = Avx.Multiply( - Unsafe.As>(ref this.V4L), - Unsafe.As>(ref other.V4L)); - - Unsafe.As>(ref this.V5L) - = Avx.Multiply( - Unsafe.As>(ref this.V5L), - Unsafe.As>(ref other.V5L)); - - Unsafe.As>(ref this.V6L) - = Avx.Multiply( - Unsafe.As>(ref this.V6L), - Unsafe.As>(ref other.V6L)); - - Unsafe.As>(ref this.V7L) - = Avx.Multiply( - Unsafe.As>(ref this.V7L), - Unsafe.As>(ref other.V7L)); + this.V0 = Avx.Multiply(this.V0, other.V0); + this.V1 = Avx.Multiply(this.V1, other.V1); + this.V2 = Avx.Multiply(this.V2, other.V2); + this.V3 = Avx.Multiply(this.V3, other.V3); + this.V4 = Avx.Multiply(this.V4, other.V4); + this.V5 = Avx.Multiply(this.V5, other.V5); + this.V6 = Avx.Multiply(this.V6, other.V6); + this.V7 = Avx.Multiply(this.V7, other.V7); } else #endif @@ -427,14 +396,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components if (Avx.IsSupported) { var valueVec = Vector256.Create(value); - Unsafe.As>(ref this.V0L) = Avx.Add(Unsafe.As>(ref this.V0L), valueVec); - Unsafe.As>(ref this.V1L) = Avx.Add(Unsafe.As>(ref this.V1L), valueVec); - Unsafe.As>(ref this.V2L) = Avx.Add(Unsafe.As>(ref this.V2L), valueVec); - Unsafe.As>(ref this.V3L) = Avx.Add(Unsafe.As>(ref this.V3L), valueVec); - Unsafe.As>(ref this.V4L) = Avx.Add(Unsafe.As>(ref this.V4L), valueVec); - Unsafe.As>(ref this.V5L) = Avx.Add(Unsafe.As>(ref this.V5L), valueVec); - Unsafe.As>(ref this.V6L) = Avx.Add(Unsafe.As>(ref this.V6L), valueVec); - Unsafe.As>(ref this.V7L) = Avx.Add(Unsafe.As>(ref this.V7L), valueVec); + this.V0 = Avx.Add(this.V0, valueVec); + this.V1 = Avx.Add(this.V1, valueVec); + this.V2 = Avx.Add(this.V2, valueVec); + this.V3 = Avx.Add(this.V3, valueVec); + this.V4 = Avx.Add(this.V4, valueVec); + this.V5 = Avx.Add(this.V5, valueVec); + this.V6 = Avx.Add(this.V6, valueVec); + this.V7 = Avx.Add(this.V7, valueVec); } else #endif @@ -529,12 +498,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components var f2 = Vector256.Create(2f); var f025 = Vector256.Create(0.25f); Vector256 switchInnerDoubleWords = Unsafe.As>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32)); - ref Vector256 destRef = ref Unsafe.As>(ref destination); + ref Vector256 destRef = ref destination.V0; for (int i = 0; i < 2; i++) { - ref Vector256 in1 = ref Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 2 * i)); - ref Vector256 in2 = ref Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), (2 * i) + 1)); + ref Vector256 in1 = ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 2 * i).V0; + ref Vector256 in2 = ref Unsafe.Add(ref MemoryMarshal.GetReference(source), (2 * i) + 1).V0; for (int j = 0; j < 8; j += 2) { @@ -588,8 +557,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components var vadd = Vector256.Create(.5F); var vone = Vector256.Create(1f); - ref Vector256 aBase = ref Unsafe.AsRef(Unsafe.As>(ref a.V0L)); - ref Vector256 bBase = ref Unsafe.AsRef(Unsafe.As>(ref b.V0L)); + ref Vector256 aBase = ref a.V0; + ref Vector256 bBase = ref b.V0; ref Vector256 aEnd = ref Unsafe.Add(ref aBase, 8); do @@ -840,26 +809,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Vector256 t0 = Avx.UnpackLow(r0, r1); Vector256 t2 = Avx.UnpackLow(r2, r3); Vector256 v = Avx.Shuffle(t0, t2, 0x4E); - Unsafe.As>(ref d.V0L) = Avx.Blend(t0, v, 0xCC); - Unsafe.As>(ref d.V1L) = Avx.Blend(t2, v, 0x33); + d.V0 = Avx.Blend(t0, v, 0xCC); + d.V1 = Avx.Blend(t2, v, 0x33); Vector256 t4 = Avx.UnpackLow(r4, r5); Vector256 t6 = Avx.UnpackLow(r6, r7); v = Avx.Shuffle(t4, t6, 0x4E); - Unsafe.As>(ref d.V4L) = Avx.Blend(t4, v, 0xCC); - Unsafe.As>(ref d.V5L) = Avx.Blend(t6, v, 0x33); + d.V4 = Avx.Blend(t4, v, 0xCC); + d.V5 = Avx.Blend(t6, v, 0x33); Vector256 t1 = Avx.UnpackHigh(r0, r1); Vector256 t3 = Avx.UnpackHigh(r2, r3); v = Avx.Shuffle(t1, t3, 0x4E); - Unsafe.As>(ref d.V2L) = Avx.Blend(t1, v, 0xCC); - Unsafe.As>(ref d.V3L) = Avx.Blend(t3, v, 0x33); + d.V2 = Avx.Blend(t1, v, 0xCC); + d.V3 = Avx.Blend(t3, v, 0x33); Vector256 t5 = Avx.UnpackHigh(r4, r5); Vector256 t7 = Avx.UnpackHigh(r6, r7); v = Avx.Shuffle(t5, t7, 0x4E); - Unsafe.As>(ref d.V6L) = Avx.Blend(t5, v, 0xCC); - Unsafe.As>(ref d.V7L) = Avx.Blend(t7, v, 0x33); + d.V6 = Avx.Blend(t5, v, 0xCC); + d.V7 = Avx.Blend(t7, v, 0x33); } else #endif diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 209cc3c6a..3ee1ca989 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System; @@ -64,9 +64,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder var zero = Vector256.Create(0).AsByte(); ref Vector256 inRef = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan)); - ref Vector256 destYRef = ref Unsafe.As>(ref yBlock); - ref Vector256 destCbRef = ref Unsafe.As>(ref cbBlock); - ref Vector256 destCrRef = ref Unsafe.As>(ref crBlock); + ref Vector256 destYRef = ref yBlock.V0; + ref Vector256 destCbRef = ref cbBlock.V0; + ref Vector256 destCrRef = ref crBlock.V0; var extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes)); var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(ExtractRgb)); From 70474c8fae925037899579bef0a37cfe0f42a9ac Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 27 May 2021 16:02:58 +0300 Subject: [PATCH 41/99] Removed redundant enum casting durint huffman encoding --- .../Jpeg/Components/Encoder/HuffmanScanEncoder.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 8f133f0de..afd5acb4b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -257,10 +257,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int dc = (int)refTemp2[0]; // Emit the DC delta. - this.EmitHuffRLE((HuffIndex)((2 * (int)index) + 0), 0, dc - prevDC); + this.EmitHuffRLE((2 * (int)index) + 0, 0, dc - prevDC); // Emit the AC components. - var h = (HuffIndex)((2 * (int)index) + 1); + int h = (2 * (int)index) + 1; int runLength = 0; for (int zig = 1; zig < Block8x8F.Size; zig++) @@ -348,9 +348,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The index of the Huffman encoder /// The value to encode. [MethodImpl(InliningOptions.ShortMethod)] - private void EmitHuff(HuffIndex index, int value) + private void EmitHuff(int index, int value) { - uint x = HuffmanLut.TheHuffmanLut[(int)index].Values[value]; + uint x = HuffmanLut.TheHuffmanLut[index].Values[value]; this.Emit(x & ((1 << 24) - 1), x >> 24); } @@ -361,7 +361,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The number of copies to encode. /// The value to encode. [MethodImpl(InliningOptions.ShortMethod)] - private void EmitHuffRLE(HuffIndex index, int runLength, int value) + private void EmitHuffRLE(int index, int runLength, int value) { int a = value; int b = value; From 52e60362680ed54d7d67e7722d885af1f36ea3e6 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 27 May 2021 16:35:09 +0300 Subject: [PATCH 42/99] Reimplemented Emit methods in HuffmanScanEncoder to get rid of unreadable amount of int/uint casts --- .../Components/Encoder/HuffmanScanEncoder.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index afd5acb4b..bbc997018 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -35,12 +35,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Emmited bits 'micro buffer' before being transfered to the . /// - private uint accumulatedBits; + private int accumulatedBits; /// /// Number of jagged bits stored in /// - private uint bitCount; + private int bitCount; private Block8x8F temporalBlock1; private Block8x8F temporalBlock2; @@ -303,10 +303,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// The packed bits. /// The number of bits [MethodImpl(InliningOptions.ShortMethod)] - private void Emit(uint bits, uint count) + private void Emit(int bits, int count) { count += this.bitCount; - bits <<= (int)(32 - count); + bits <<= 32 - count; bits |= this.accumulatedBits; // Only write if more than 8 bits. @@ -350,7 +350,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder [MethodImpl(InliningOptions.ShortMethod)] private void EmitHuff(int index, int value) { - uint x = HuffmanLut.TheHuffmanLut[index].Values[value]; + int x = (int)HuffmanLut.TheHuffmanLut[index].Values[value]; this.Emit(x & ((1 << 24) - 1), x >> 24); } @@ -371,12 +371,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder b = value - 1; } - uint bt = (uint)Numerics.MinimumBitsToStore16((uint)a); + int bt = Numerics.MinimumBitsToStore16((uint)a); - this.EmitHuff(index, (int)((uint)(runLength << 4) | bt)); + this.EmitHuff(index, (runLength << 4) | bt); if (bt > 0) { - this.Emit((uint)b & (uint)((1 << ((int)bt)) - 1), bt); + this.Emit(b & ((1 << bt) - 1), bt); } } } From 7fb8feef50df5417bbb467bca451e43987637705 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 27 May 2021 17:10:36 +0300 Subject: [PATCH 43/99] Fixed xml docs --- shared-infrastructure | 2 +- .../Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/shared-infrastructure b/shared-infrastructure index 25f565310..1f7ee7028 160000 --- a/shared-infrastructure +++ b/shared-infrastructure @@ -1 +1 @@ -Subproject commit 25f56531057293e9f1fa8e070b2f780a0c3d7e0c +Subproject commit 1f7ee702812f3a1713ab7f749c0faae0ef139ed7 diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index bbc997018..571a80698 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// /// This is subject to change, 1024 seems to be the best value in terms of performance. - /// expects it to be at least 8 (see comments in method body). + /// expects it to be at least 8 (see comments in method body). /// private const int EmitBufferSizeInBytes = 1024; @@ -374,10 +374,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int bt = Numerics.MinimumBitsToStore16((uint)a); this.EmitHuff(index, (runLength << 4) | bt); - if (bt > 0) - { - this.Emit(b & ((1 << bt) - 1), bt); - } + this.Emit(b & ((1 << bt) - 1), bt); } } } From d7fd9478762b59408021bdb4039beeca43502289 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 27 May 2021 18:08:59 +0300 Subject: [PATCH 44/99] Updated default quality settings in jpeg encoding benchmark --- shared-infrastructure | 2 +- tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/shared-infrastructure b/shared-infrastructure index 1f7ee7028..48e73f455 160000 --- a/shared-infrastructure +++ b/shared-infrastructure @@ -1 +1 @@ -Subproject commit 1f7ee702812f3a1713ab7f749c0faae0ef139ed7 +Subproject commit 48e73f455f15eafefbe3175efc7433e5f277e506 diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs index 90b0501eb..e22259f76 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs @@ -14,7 +14,8 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg public class EncodeJpeg { private const string TestImage = TestImages.Jpeg.BenchmarkSuite.Jpeg420Exif_MidSizeYCbCr; - private const int EncodingQuality = 100; + // GDI+ most likely uses 75 as default quality - https://stackoverflow.com/questions/3957477/what-quality-level-does-image-save-use-for-jpeg-files + private const int EncodingQuality = 75; // GDI+ uses 4:2:0 subsampling private const JpegSubsample EncodingSubsampling = JpegSubsample.Ratio420; From 81979e0f29ccbd425158da6c49604550e437ff62 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 27 May 2021 19:23:35 +0300 Subject: [PATCH 45/99] Improved flush logic after main encode methods run --- .../Components/Encoder/HuffmanScanEncoder.cs | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 571a80698..d69473124 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -108,9 +108,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - // Pad the last byte with 1's. - this.Emit(0x7f, 7); - this.target.Write(this.emitBuffer, 0, this.emitLen); + this.FlushInternalBuffer(); } /// @@ -181,9 +179,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - // Pad the last byte with 1's. - this.Emit(0x7f, 7); - this.target.Write(this.emitBuffer, 0, this.emitLen); + this.FlushInternalBuffer(); } /// @@ -224,9 +220,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - // Pad the last byte with 1's. - this.Emit(0x7f, 7); - this.target.Write(this.emitBuffer, 0, this.emitLen); + this.FlushInternalBuffer(); } /// @@ -376,5 +370,25 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder this.EmitHuff(index, (runLength << 4) | bt); this.Emit(b & ((1 << bt) - 1), bt); } + + /// + /// Writes remaining bytes from internal buffer to the target stream. + /// + /// Pads last byte with 1's if necessary + private void FlushInternalBuffer() + { + // pad last byte with 1's + int padBitsCount = 8 - (this.bitCount % 8); + if (padBitsCount != 0) + { + this.Emit(0xff, padBitsCount); + } + + // flush remaining bytes + if (this.emitLen != 0) + { + this.target.Write(this.emitBuffer, 0, this.emitLen); + } + } } } From 16842496be84834e88a90fad70b33ede1d2ecf82 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 27 May 2021 22:30:30 +0300 Subject: [PATCH 46/99] Brought back if check --- .../Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index d69473124..af8192749 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -368,7 +368,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int bt = Numerics.MinimumBitsToStore16((uint)a); this.EmitHuff(index, (runLength << 4) | bt); - this.Emit(b & ((1 << bt) - 1), bt); + if (bt > 0) + { + this.Emit(b & ((1 << bt) - 1), bt); + } } /// From 9c0999e9db43f4adca0174d266d9eb49fb077aea Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 27 May 2021 23:54:50 +0300 Subject: [PATCH 47/99] Huffman lookup tables are now integers instead of unsigned integers --- .../Formats/Jpeg/Components/Encoder/HuffmanLut.cs | 10 +++++----- .../Jpeg/Components/Encoder/HuffmanScanEncoder.cs | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs index bc2c7634b..bc6c8c6cc 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder @@ -44,7 +44,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - this.Values = new uint[maxValue + 1]; + this.Values = new int[maxValue + 1]; int code = 0; int k = 0; @@ -54,7 +54,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int bits = (i + 1) << 24; for (int j = 0; j < spec.Count[i]; j++) { - this.Values[spec.Values[k]] = (uint)(bits | code); + this.Values[spec.Values[k]] = bits | code; code++; k++; } @@ -66,6 +66,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Gets the collection of huffman values. /// - public uint[] Values { get; } + public int[] Values { get; } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index af8192749..0320229a2 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -344,7 +344,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder [MethodImpl(InliningOptions.ShortMethod)] private void EmitHuff(int index, int value) { - int x = (int)HuffmanLut.TheHuffmanLut[index].Values[value]; + int x = HuffmanLut.TheHuffmanLut[index].Values[value]; this.Emit(x & ((1 << 24) - 1), x >> 24); } From 169e98bbcd15c42424f710b557a1471a88c150a5 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 28 May 2021 11:47:16 +0300 Subject: [PATCH 48/99] Simplified Block8x8F.DivideRoundAll() method --- .../Formats/Jpeg/Components/Block8x8F.cs | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 340d8e5c5..0acc6408e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -68,6 +68,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components public Vector4 V7R; #if SUPPORTS_RUNTIME_INTRINSICS + /// + /// A number of rows of 8 scalar coefficients each in + /// + public const int RowCount = 8; + [FieldOffset(0)] public Vector256 V0; [FieldOffset(32)] @@ -557,19 +562,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components var vadd = Vector256.Create(.5F); var vone = Vector256.Create(1f); - ref Vector256 aBase = ref a.V0; - ref Vector256 bBase = ref b.V0; - ref Vector256 aEnd = ref Unsafe.Add(ref aBase, 8); - - do + for (int i = 0; i < RowCount; i++) { - Vector256 voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aBase), vone), vadd); - Unsafe.Add(ref aBase, 0) = Avx.Add(Avx.Divide(aBase, bBase), voff); - - aBase = ref Unsafe.Add(ref aBase, 1); - bBase = ref Unsafe.Add(ref bBase, 1); + ref Vector256 aRow = ref Unsafe.Add(ref a.V0, i); + ref Vector256 bRow = ref Unsafe.Add(ref b.V0, i); + Vector256 voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aRow), vone), vadd); + aRow = Avx.Add(Avx.Divide(aRow, bRow), voff); } - while (Unsafe.IsAddressLessThan(ref aBase, ref aEnd)); } else #endif From 6ac2b6660bf015ee95637c7af948bbffa18a1c4f Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 29 May 2021 14:21:49 +0300 Subject: [PATCH 49/99] Added comments to vectorized rgb->ycbcr converter for further code changes --- .../Encoder/RgbToYCbCrConverterVectorized.cs | 36 +++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 3ee1ca989..a6ff21bdc 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -47,6 +47,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder }; #endif + /// + /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices + /// + /// Total size of rgb span must be 200 bytes + /// Span of rgb pixels with size of 64 + /// 8x8 destination matrix of Luminance(Y) converted data + /// 8x8 destination matrix of Chrominance(Cb) converted data + /// 8x8 destination matrix of Chrominance(Cr) converted data public static void Convert(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) { Debug.Assert(IsSupported, "AVX2 is required to run this converter"); @@ -63,7 +71,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder var f05 = Vector256.Create(0.5f); var zero = Vector256.Create(0).AsByte(); - ref Vector256 inRef = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan)); + ref Vector256 rgbByteSpan = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan)); ref Vector256 destYRef = ref yBlock.V0; ref Vector256 destCbRef = ref cbBlock.V0; ref Vector256 destCrRef = ref crBlock.V0; @@ -72,9 +80,31 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(ExtractRgb)); Vector256 rgb, rg, bx; Vector256 r, g, b; + + // TODO: probably remove this after the draft + // rgbByteSpan contains 8 strides by 8 pixels each, thus 64 pixels total + // Strides are stored sequentially - one big span of 64 * 3 = 192 bytes + // Each stride has exactly 3 * 8 = 24 bytes or 3 * 8 * 8 = 192 bits + // Avx registers are 256 bits so rgb span will be loaded with extra 64 bits from the next stride: + // stride 0 0 - 192 -(+64bits)-> 256 + // stride 1 192 - 384 -(+64bits)-> 448 + // stride 2 384 - 576 -(+64bits)-> 640 + // stride 3 576 - 768 -(+64bits)-> 832 + // stride 4 768 - 960 -(+64bits)-> 1024 + // stride 5 960 - 1152 -(+64bits)-> 1216 + // stride 6 1152 - 1344 -(+64bits)-> 1408 + // stride 7 1344 - 1536 -(+64bits)-> 1600 <-- READ ACCESS VIOLATION + // + // Total size of the 64 pixel rgb span: 64 * 3 * 8 = 1536 bits, avx operations require 1600 bits + // This is not permitted - we are reading foreign memory + // That's why last stride is calculated outside of the for-loop loop with special extract shuffle mask involved + // + // Extra mask & separate stride:7 calculations can be eliminated by simply providing rgb pixel span of slightly bigger size than pixels data need: + // Total pixel data size is 192 bytes, avx registers need it to be 200 bytes + const int bytesPerRgbStride = 24; for (int i = 0; i < 7; i++) { - rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)(24 * i)).AsUInt32(), extractToLanesMask).AsByte(); + rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * i)).AsUInt32(), extractToLanesMask).AsByte(); rgb = Avx2.Shuffle(rgb, extractRgbMask); @@ -96,7 +126,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveLast24BytesToSeparateLanes)); - rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)160).AsUInt32(), extractToLanesMask).AsByte(); + rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)160).AsUInt32(), extractToLanesMask).AsByte(); rgb = Avx2.Shuffle(rgb, extractRgbMask); rg = Avx2.UnpackLow(rgb, zero); From a845c00f6f5698dc2ba5e11a39791d49bc443eb6 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 29 May 2021 14:47:06 +0300 Subject: [PATCH 50/99] Simplified RgbToYCbCrConverterVectorized.Convert() method --- .../Encoder/RgbToYCbCrConverterVectorized.cs | 28 +------------------ .../Encoder/YCbCrForwardConverter{TPixel}.cs | 17 +++++++---- 2 files changed, 12 insertions(+), 33 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index a6ff21bdc..62e82243c 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -34,12 +34,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 }; - private static ReadOnlySpan MoveLast24BytesToSeparateLanes => new byte[] - { - 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, - 5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0 - }; - private static ReadOnlySpan ExtractRgb => new byte[] { 0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF, @@ -102,7 +96,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // Extra mask & separate stride:7 calculations can be eliminated by simply providing rgb pixel span of slightly bigger size than pixels data need: // Total pixel data size is 192 bytes, avx registers need it to be 200 bytes const int bytesPerRgbStride = 24; - for (int i = 0; i < 7; i++) + for (int i = 0; i < 8; i++) { rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * i)).AsUInt32(), extractToLanesMask).AsByte(); @@ -124,26 +118,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); } - - extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveLast24BytesToSeparateLanes)); - rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)160).AsUInt32(), extractToLanesMask).AsByte(); - rgb = Avx2.Shuffle(rgb, extractRgbMask); - - rg = Avx2.UnpackLow(rgb, zero); - bx = Avx2.UnpackHigh(rgb, zero); - - r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); - g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); - b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); - - // (0.299F * r) + (0.587F * g) + (0.114F * b); - Unsafe.Add(ref destYRef, 7) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); - - // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) - Unsafe.Add(ref destCbRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); - - // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) - Unsafe.Add(ref destCrRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); #endif } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index 81e64b277..ee4626b86 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -2,6 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.PixelFormats; @@ -42,14 +43,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Temporal RGB block /// - private GenericBlock8x8 rgbBlock; + private Span rgbSpan; public static YCbCrForwardConverter Create() { var result = default(YCbCrForwardConverter); + + // creating rgb pixel bufferr + // TODO: this is subject to discuss + result.rgbSpan = MemoryMarshal.Cast(new byte[200].AsSpan()); + + // Avoid creating lookup tables, when vectorized converter is supported if (!RgbToYCbCrConverterVectorized.IsSupported) { - // Avoid creating lookup tables, when vectorized converter is supported result.colorTables = RgbToYCbCrConverterLut.Create(); } @@ -63,8 +69,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows); - Span rgbSpan = this.rgbBlock.AsSpanUnsafe(); - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), rgbSpan); + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), this.rgbSpan); ref Block8x8F yBlock = ref this.Y; ref Block8x8F cbBlock = ref this.Cb; @@ -72,11 +77,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder if (RgbToYCbCrConverterVectorized.IsSupported) { - RgbToYCbCrConverterVectorized.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + RgbToYCbCrConverterVectorized.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } else { - this.colorTables.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } } From 2ad3ddb0364784916b95bce180618da7b279783b Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 29 May 2021 19:31:39 +0300 Subject: [PATCH 51/99] [WIP] Introduced RgbToYCbCrConverterVectorized 420 sampling --- .../Components/Encoder/HuffmanScanEncoder.cs | 21 +-- .../Encoder/RgbToYCbCrConverterVectorized.cs | 141 +++++++++++++++++- .../Encoder/YCbCrForwardConverter{TPixel}.cs | 36 +++++ 3 files changed, 184 insertions(+), 14 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 0320229a2..dc41e179e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -83,7 +83,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int x = 0; x < pixels.Width; x += 8) { - pixelConverter.Convert(frame, x, y, ref currentRows); + pixelConverter.Convert444(frame, x, y, ref currentRows); prevDCY = this.WriteBlock( QuantIndex.Luminance, @@ -123,9 +123,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Encode420(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - Block8x8F b = default; - Span cb = stackalloc Block8x8F[4]; - Span cr = stackalloc Block8x8F[4]; + Span temporalBlocks = stackalloc Block8x8F[2]; var unzig = ZigZag.CreateUnzigTable(); @@ -148,32 +146,29 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int yOff = (i & 2) * 4; currentRows.Update(pixelBuffer, y + yOff); - pixelConverter.Convert(frame, x + xOff, y + yOff, ref currentRows); - - cb[i] = pixelConverter.Cb; - cr[i] = pixelConverter.Cr; + pixelConverter.Convert420(frame, x + xOff, y + yOff, ref currentRows, ref temporalBlocks[0], i); prevDCY = this.WriteBlock( QuantIndex.Luminance, prevDCY, - ref pixelConverter.Y, + ref temporalBlocks[0], ref luminanceQuantTable, ref unzig); } - Block8x8F.Scale16X16To8X8(ref b, cb); + pixelConverter.ConvertCbCr(ref temporalBlocks[0], ref temporalBlocks[1]); + prevDCCb = this.WriteBlock( QuantIndex.Chrominance, prevDCCb, - ref b, + ref temporalBlocks[0], ref chrominanceQuantTable, ref unzig); - Block8x8F.Scale16X16To8X8(ref b, cr); prevDCCr = this.WriteBlock( QuantIndex.Chrominance, prevDCCr, - ref b, + ref temporalBlocks[1], ref chrominanceQuantTable, ref unzig); } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 62e82243c..9760e9e93 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -42,7 +42,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder #endif /// - /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices + /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices with 4:4:4 subsampling /// /// Total size of rgb span must be 200 bytes /// Span of rgb pixels with size of 64 @@ -120,5 +120,144 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } #endif } + + /// + /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices with 4:2:0 subsampling + /// + /// Total size of rgb span must be 200 bytes + /// Span of rgb pixels with size of 64 + /// 8x8 destination matrix of Luminance(Y) converted data + /// + /// + /// + /// + public static void Convert420(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F rAcc, ref Block8x8F gAcc, ref Block8x8F bAcc, int idx) + { + Debug.Assert(IsSupported, "AVX2 is required to run this converter"); + +#if SUPPORTS_RUNTIME_INTRINSICS + var f0299 = Vector256.Create(0.299f); + var f0587 = Vector256.Create(0.587f); + var f0114 = Vector256.Create(0.114f); + var fn0168736 = Vector256.Create(-0.168736f); + var fn0331264 = Vector256.Create(-0.331264f); + var f128 = Vector256.Create(128f); + var fn0418688 = Vector256.Create(-0.418688f); + var fn0081312F = Vector256.Create(-0.081312F); + var f05 = Vector256.Create(0.5f); + var zero = Vector256.Create(0).AsByte(); + + ref Vector256 rgbByteSpan = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan)); + ref Vector256 destYRef = ref yBlock.V0; + + int destOffset = (idx & 2) * 4 + (idx & 1); + + ref Vector128 destRedRef = ref Unsafe.Add(ref Unsafe.As>(ref rAcc), destOffset); + ref Vector128 destGreenRef = ref Unsafe.Add(ref Unsafe.As>(ref gAcc), destOffset); + ref Vector128 destBlueRef = ref Unsafe.Add(ref Unsafe.As>(ref bAcc), destOffset); + + var extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes)); + var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(ExtractRgb)); + Vector256 rgb, rg, bx; + Vector256 r, g, b; + + Span> rDataLanes = stackalloc Vector256[4]; + Span> gDataLanes = stackalloc Vector256[4]; + Span> bDataLanes = stackalloc Vector256[4]; + + const int bytesPerRgbStride = 24; + for (int i = 0; i < 2; i++) + { + // each 4 lanes - [0, 1, 2, 3] & [4, 5, 6, 7] + for (int j = 0; j < 4; j++) + { + rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte(); + + rgb = Avx2.Shuffle(rgb, extractRgbMask); + + rg = Avx2.UnpackLow(rgb, zero); + bx = Avx2.UnpackHigh(rgb, zero); + + r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); + g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); + b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); + + // (0.299F * r) + (0.587F * g) + (0.114F * b); + Unsafe.Add(ref destYRef, i * 4 + j) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); + + rDataLanes[j] = r; + gDataLanes[j] = g; + bDataLanes[j] = b; + } + + int localDestOffset = (i & 1) * 4; + + // red + Vector256 twoLane = Scale_8x4_4x2(rDataLanes); + Unsafe.Add(ref destRedRef, localDestOffset) = twoLane.GetLower(); + Unsafe.Add(ref destRedRef, localDestOffset + 2) = twoLane.GetUpper(); + + // green + twoLane = Scale_8x4_4x2(gDataLanes); + Unsafe.Add(ref destGreenRef, localDestOffset) = twoLane.GetLower(); + Unsafe.Add(ref destGreenRef, localDestOffset + 2) = twoLane.GetUpper(); + + // blue + twoLane = Scale_8x4_4x2(bDataLanes); + Unsafe.Add(ref destBlueRef, localDestOffset) = twoLane.GetLower(); + Unsafe.Add(ref destBlueRef, localDestOffset + 2) = twoLane.GetUpper(); + } +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Scale_8x4_4x2(Span> v) + { + Vector256 switchInnerDoubleWords = Unsafe.As>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32)); + var f025 = Vector256.Create(0.25f); + + Vector256 topPairSum = SumHorizontalPairs(v[0], v[1]); + Vector256 botPairSum = SumHorizontalPairs(v[2], v[3]); + + return Avx2.PermuteVar8x32(Avx.Multiply(SumVerticalPairs(topPairSum, botPairSum), f025), switchInnerDoubleWords); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SumHorizontalPairs(Vector256 v0, Vector256 v1) + => Avx.Add(Avx.Shuffle(v0, v1, 0b10_00_10_00), Avx.Shuffle(v0, v1, 0b11_01_11_01)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 SumVerticalPairs(Vector256 v0, Vector256 v1) + => Avx.Add(Avx.Shuffle(v0, v1, 0b01_00_01_00), Avx.Shuffle(v0, v1, 0b11_10_11_10)); + + public static void ConvertCbCr(ref Block8x8F rBlock, ref Block8x8F gBlock, ref Block8x8F bBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) + { + var fn0168736 = Vector256.Create(-0.168736f); + var fn0331264 = Vector256.Create(-0.331264f); + var f128 = Vector256.Create(128f); + var fn0418688 = Vector256.Create(-0.418688f); + var fn0081312F = Vector256.Create(-0.081312F); + var f05 = Vector256.Create(0.5f); + + ref Vector256 destCbRef = ref cbBlock.V0; + ref Vector256 destCrRef = ref crBlock.V0; + + ref Vector256 rRef = ref rBlock.V0; + ref Vector256 gRef = ref gBlock.V0; + ref Vector256 bRef = ref bBlock.V0; + + for (int i = 0; i < 8; i++) + { + ref Vector256 r = ref Unsafe.Add(ref rRef, i); + ref Vector256 g = ref Unsafe.Add(ref gRef, i); + ref Vector256 b = ref Unsafe.Add(ref bRef, i); + + // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) + Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); + + // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) + Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); + } + } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index ee4626b86..7bf7b8547 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -84,5 +84,41 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } + + /// + /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) + /// + public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, ref Block8x8F yBlock, int idx) + { + this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows); + + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), this.rgbSpan); + + ref Block8x8F rSub = ref this.Y; + ref Block8x8F gSub = ref this.Cb; + ref Block8x8F bSub = ref this.Cr; + + if (RgbToYCbCrConverterVectorized.IsSupported) + { + RgbToYCbCrConverterVectorized.Convert420(this.rgbSpan, ref yBlock, ref rSub, ref gSub, ref bSub, idx); + } + else + { + throw new NotSupportedException("This is not yet implemented"); + //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + } + } + + public void ConvertCbCr(ref Block8x8F cb, ref Block8x8F cr) + { + if (RgbToYCbCrConverterVectorized.IsSupported) + { + RgbToYCbCrConverterVectorized.ConvertCbCr(ref this.Y, ref this.Cb, ref this.Cr, ref cb, ref cr); + } + else + { + throw new NotSupportedException("This is not yet implemented"); + } + } } } From 201c5341e69fbedcbe5bc619edb81ee85419321f Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 29 May 2021 19:40:13 +0300 Subject: [PATCH 52/99] Fixed HuffmanScanEncoder error --- .../Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index dc41e179e..3d99a1b95 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -83,7 +83,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int x = 0; x < pixels.Width; x += 8) { - pixelConverter.Convert444(frame, x, y, ref currentRows); + pixelConverter.Convert(frame, x, y, ref currentRows); prevDCY = this.WriteBlock( QuantIndex.Luminance, From 8a7749644ab7b1170fc86194b400007885144678 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 29 May 2021 21:19:36 +0300 Subject: [PATCH 53/99] Imporved internal rgb -> rcbcr conversion api for 420 subsampling --- .../Components/Encoder/HuffmanScanEncoder.cs | 10 +++--- .../Encoder/RgbToYCbCrConverterVectorized.cs | 36 ++++++++----------- .../Encoder/YCbCrForwardConverter{TPixel}.cs | 20 ++--------- 3 files changed, 21 insertions(+), 45 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 3d99a1b95..ff5ce957e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -146,29 +146,27 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int yOff = (i & 2) * 4; currentRows.Update(pixelBuffer, y + yOff); - pixelConverter.Convert420(frame, x + xOff, y + yOff, ref currentRows, ref temporalBlocks[0], i); + pixelConverter.Convert420(frame, x + xOff, y + yOff, ref currentRows, i); prevDCY = this.WriteBlock( QuantIndex.Luminance, prevDCY, - ref temporalBlocks[0], + ref pixelConverter.Y, ref luminanceQuantTable, ref unzig); } - pixelConverter.ConvertCbCr(ref temporalBlocks[0], ref temporalBlocks[1]); - prevDCCb = this.WriteBlock( QuantIndex.Chrominance, prevDCCb, - ref temporalBlocks[0], + ref pixelConverter.Cb, ref chrominanceQuantTable, ref unzig); prevDCCr = this.WriteBlock( QuantIndex.Chrominance, prevDCCr, - ref temporalBlocks[1], + ref pixelConverter.Cr, ref chrominanceQuantTable, ref unzig); } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 9760e9e93..055c7176a 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -126,12 +126,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Total size of rgb span must be 200 bytes /// Span of rgb pixels with size of 64 - /// 8x8 destination matrix of Luminance(Y) converted data - /// - /// - /// - /// - public static void Convert420(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F rAcc, ref Block8x8F gAcc, ref Block8x8F bAcc, int idx) + /// 8x8 destination matrix of Luminance(Y) converted dataф + public static void Convert420(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock, int idx) { Debug.Assert(IsSupported, "AVX2 is required to run this converter"); @@ -152,9 +148,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int destOffset = (idx & 2) * 4 + (idx & 1); - ref Vector128 destRedRef = ref Unsafe.Add(ref Unsafe.As>(ref rAcc), destOffset); - ref Vector128 destGreenRef = ref Unsafe.Add(ref Unsafe.As>(ref gAcc), destOffset); - ref Vector128 destBlueRef = ref Unsafe.Add(ref Unsafe.As>(ref bAcc), destOffset); + ref Vector128 destCbRef = ref Unsafe.Add(ref Unsafe.As>(ref cbBlock), destOffset); + ref Vector128 destCrRef = ref Unsafe.Add(ref Unsafe.As>(ref crBlock), destOffset); var extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes)); var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(ExtractRgb)); @@ -192,20 +187,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int localDestOffset = (i & 1) * 4; - // red - Vector256 twoLane = Scale_8x4_4x2(rDataLanes); - Unsafe.Add(ref destRedRef, localDestOffset) = twoLane.GetLower(); - Unsafe.Add(ref destRedRef, localDestOffset + 2) = twoLane.GetUpper(); + r = Scale_8x4_4x2(rDataLanes); + g = Scale_8x4_4x2(gDataLanes); + b = Scale_8x4_4x2(bDataLanes); - // green - twoLane = Scale_8x4_4x2(gDataLanes); - Unsafe.Add(ref destGreenRef, localDestOffset) = twoLane.GetLower(); - Unsafe.Add(ref destGreenRef, localDestOffset + 2) = twoLane.GetUpper(); + // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) + Vector256 cb = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); + Unsafe.Add(ref destCbRef, localDestOffset) = cb.GetLower(); + Unsafe.Add(ref destCbRef, localDestOffset + 2) = cb.GetUpper(); - // blue - twoLane = Scale_8x4_4x2(bDataLanes); - Unsafe.Add(ref destBlueRef, localDestOffset) = twoLane.GetLower(); - Unsafe.Add(ref destBlueRef, localDestOffset + 2) = twoLane.GetUpper(); + // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) + Vector256 cr = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); + Unsafe.Add(ref destCrRef, localDestOffset) = cr.GetLower(); + Unsafe.Add(ref destCrRef, localDestOffset + 2) = cr.GetUpper(); } #endif } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index 7bf7b8547..c835e8df8 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -88,19 +88,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) /// - public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, ref Block8x8F yBlock, int idx) + public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) { this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows); PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), this.rgbSpan); - ref Block8x8F rSub = ref this.Y; - ref Block8x8F gSub = ref this.Cb; - ref Block8x8F bSub = ref this.Cr; - if (RgbToYCbCrConverterVectorized.IsSupported) { - RgbToYCbCrConverterVectorized.Convert420(this.rgbSpan, ref yBlock, ref rSub, ref gSub, ref bSub, idx); + RgbToYCbCrConverterVectorized.Convert420(this.rgbSpan, ref this.Y, ref this.Cb, ref this.Cr, idx); } else { @@ -108,17 +104,5 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } - - public void ConvertCbCr(ref Block8x8F cb, ref Block8x8F cr) - { - if (RgbToYCbCrConverterVectorized.IsSupported) - { - RgbToYCbCrConverterVectorized.ConvertCbCr(ref this.Y, ref this.Cb, ref this.Cr, ref cb, ref cr); - } - else - { - throw new NotSupportedException("This is not yet implemented"); - } - } } } From 052ebde3ad4abd3a68d9648a66fc4ae9be37df82 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 29 May 2021 22:31:17 +0300 Subject: [PATCH 54/99] Replaced GenericBlocl8x8 with Span in ycbcr converter --- .../Encoder/YCbCrForwardConverter{TPixel}.cs | 65 +++++++++++++++++-- 1 file changed, 60 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index c835e8df8..952dde111 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -2,6 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.PixelFormats; @@ -38,7 +39,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Temporal 8x8 block to hold TPixel data /// - private GenericBlock8x8 pixelBlock; + private Span pixelSpan; /// /// Temporal RGB block @@ -52,6 +53,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // creating rgb pixel bufferr // TODO: this is subject to discuss result.rgbSpan = MemoryMarshal.Cast(new byte[200].AsSpan()); + result.pixelSpan = new TPixel[64].AsSpan(); // Avoid creating lookup tables, when vectorized converter is supported if (!RgbToYCbCrConverterVectorized.IsSupported) @@ -67,9 +69,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows) { - this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows); + Memory.Buffer2D buffer = frame.PixelBuffer; + LoadAndStretchEdges(currentRows, this.pixelSpan, x, buffer.Width - x, buffer.Height - y); - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), this.rgbSpan); + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); ref Block8x8F yBlock = ref this.Y; ref Block8x8F cbBlock = ref this.Cb; @@ -90,9 +93,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) { - this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows); + Memory.Buffer2D buffer = frame.PixelBuffer; + LoadAndStretchEdges(currentRows, this.pixelSpan, x, Math.Min(8, buffer.Width - x), Math.Min(8, buffer.Height - y)); - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), this.rgbSpan); + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); if (RgbToYCbCrConverterVectorized.IsSupported) { @@ -104,5 +108,56 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } + + // TODO: add DebugGuard checks? + private static void LoadAndStretchEdges(RowOctet source, Span dest, int startX, int width, int height) + { + //Guard.MustBeBetweenOrEqualTo(width, 1, 8, nameof(width)); + //Guard.MustBeBetweenOrEqualTo(height, 1, 8, nameof(width)); + + // TODO: this is a strange check, most likely it was introduces due to 2x 8x8 blocks subsampling, should be gone after new 4:2:0 implementation + if (width <= 0 || height <= 0) + { + return; + } + + uint byteWidth = (uint)(width * Unsafe.SizeOf()); + int remainderXCount = 8 - width; + + ref byte blockStart = ref MemoryMarshal.GetReference(MemoryMarshal.Cast(dest)); + int rowSizeInBytes = 8 * Unsafe.SizeOf(); + + for (int y = 0; y < height; y++) + { + Span row = source[y]; + + ref byte s = ref Unsafe.As(ref row[startX]); + ref byte d = ref Unsafe.Add(ref blockStart, y * rowSizeInBytes); + + Unsafe.CopyBlock(ref d, ref s, byteWidth); + + ref TPixel last = ref Unsafe.Add(ref Unsafe.As(ref d), width - 1); + + for (int x = 1; x <= remainderXCount; x++) + { + Unsafe.Add(ref last, x) = last; + } + } + + int remainderYCount = 8 - height; + + if (remainderYCount == 0) + { + return; + } + + ref byte lastRowStart = ref Unsafe.Add(ref blockStart, (height - 1) * rowSizeInBytes); + + for (int y = 1; y <= remainderYCount; y++) + { + ref byte remStart = ref Unsafe.Add(ref lastRowStart, rowSizeInBytes * y); + Unsafe.CopyBlock(ref remStart, ref lastRowStart, (uint)rowSizeInBytes); + } + } } } From d50e255c854cd3c3e46238f7588f102ea3298fd7 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 00:13:06 +0300 Subject: [PATCH 55/99] [WIP] Implemented 16x8 420 subsampling convertion --- .../Components/Encoder/HuffmanScanEncoder.cs | 19 ++-- .../Encoder/RgbToYCbCrConverterVectorized.cs | 86 ++++++++++++++++++- .../Encoder/YCbCrForwardConverter{TPixel}.cs | 24 ++++-- 3 files changed, 110 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index ff5ce957e..f6e55153a 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -123,8 +123,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Encode420(Image pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - Span temporalBlocks = stackalloc Block8x8F[2]; - var unzig = ZigZag.CreateUnzigTable(); var pixelConverter = YCbCrForwardConverter.Create(); @@ -140,18 +138,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder cancellationToken.ThrowIfCancellationRequested(); for (int x = 0; x < pixels.Width; x += 16) { - for (int i = 0; i < 4; i++) + for(int i = 0; i < 2; i++) { - int xOff = (i & 1) * 8; - int yOff = (i & 2) * 4; - + int yOff = i * 8; currentRows.Update(pixelBuffer, y + yOff); - pixelConverter.Convert420(frame, x + xOff, y + yOff, ref currentRows, i); + pixelConverter.Convert420(frame, x, y, ref currentRows, i); + + prevDCY = this.WriteBlock( + QuantIndex.Luminance, + prevDCY, + ref pixelConverter.twinBlocksY[0], + ref luminanceQuantTable, + ref unzig); prevDCY = this.WriteBlock( QuantIndex.Luminance, prevDCY, - ref pixelConverter.Y, + ref pixelConverter.twinBlocksY[1], ref luminanceQuantTable, ref unzig); } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 055c7176a..a44b174d8 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -204,14 +204,96 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder #endif } + /// + /// Converts 16x8 Rgb24 pixels matrix to 2 Y 8x8 matrices with 4:2:0 subsampling + /// + /// + /// + /// + /// + /// + /// + public static void Convert420_16x8(ReadOnlySpan rgbSpan, Span yBlocks, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) + { + Debug.Assert(IsSupported, "AVX2 is required to run this converter"); + +#if SUPPORTS_RUNTIME_INTRINSICS + var f0299 = Vector256.Create(0.299f); + var f0587 = Vector256.Create(0.587f); + var f0114 = Vector256.Create(0.114f); + var fn0168736 = Vector256.Create(-0.168736f); + var fn0331264 = Vector256.Create(-0.331264f); + var f128 = Vector256.Create(128f); + var fn0418688 = Vector256.Create(-0.418688f); + var fn0081312F = Vector256.Create(-0.081312F); + var f05 = Vector256.Create(0.5f); + var zero = Vector256.Create(0).AsByte(); + + ref Vector256 rgbByteSpan = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan)); + + int destOffset = row * 4; + + ref Vector256 destCbRef = ref Unsafe.Add(ref Unsafe.As>(ref cbBlock), destOffset); + ref Vector256 destCrRef = ref Unsafe.Add(ref Unsafe.As>(ref crBlock), destOffset); + + var extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes)); + var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(ExtractRgb)); + Vector256 rgb, rg, bx; + Vector256 r, g, b; + + Span> rDataLanes = stackalloc Vector256[4]; + Span> gDataLanes = stackalloc Vector256[4]; + Span> bDataLanes = stackalloc Vector256[4]; + + const int bytesPerRgbStride = 24; + for (int i = 0; i < 4; i++) + { + // 16x2 => 8x1 + for (int j = 0; j < 4; j++) + { + rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte(); + + rgb = Avx2.Shuffle(rgb, extractRgbMask); + + rg = Avx2.UnpackLow(rgb, zero); + bx = Avx2.UnpackHigh(rgb, zero); + + r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); + g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); + b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); + + int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1); + + // (0.299F * r) + (0.587F * g) + (0.114F * b); + Unsafe.Add(ref yBlocks[j & 1].V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); + + rDataLanes[j] = r; + gDataLanes[j] = g; + bDataLanes[j] = b; + } + + r = Scale_8x4_4x2(rDataLanes); + g = Scale_8x4_4x2(gDataLanes); + b = Scale_8x4_4x2(bDataLanes); + + // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) + Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); + + // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) + Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); + } +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Scale_8x4_4x2(Span> v) { Vector256 switchInnerDoubleWords = Unsafe.As>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32)); var f025 = Vector256.Create(0.25f); - Vector256 topPairSum = SumHorizontalPairs(v[0], v[1]); - Vector256 botPairSum = SumHorizontalPairs(v[2], v[3]); + Vector256 topPairSum = SumHorizontalPairs(v[0], v[2]); + Vector256 botPairSum = SumHorizontalPairs(v[1], v[3]); return Avx2.PermuteVar8x32(Avx.Multiply(SumVerticalPairs(topPairSum, botPairSum), f025), switchInnerDoubleWords); } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index 952dde111..120b21e10 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -46,14 +46,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// private Span rgbSpan; + public Span twinBlocksY; + public static YCbCrForwardConverter Create() { var result = default(YCbCrForwardConverter); // creating rgb pixel bufferr // TODO: this is subject to discuss - result.rgbSpan = MemoryMarshal.Cast(new byte[200].AsSpan()); - result.pixelSpan = new TPixel[64].AsSpan(); + const int twoBlocksByteSizeWithPadding = 384 + 8; // converter.Convert comments for +8 padding + result.rgbSpan = MemoryMarshal.Cast(new byte[twoBlocksByteSizeWithPadding].AsSpan()); + // TODO: this size should be configurable + result.pixelSpan = new TPixel[128].AsSpan(); + + result.twinBlocksY = new Block8x8F[2].AsSpan(); // Avoid creating lookup tables, when vectorized converter is supported if (!RgbToYCbCrConverterVectorized.IsSupported) @@ -70,7 +76,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows) { Memory.Buffer2D buffer = frame.PixelBuffer; - LoadAndStretchEdges(currentRows, this.pixelSpan, x, buffer.Width - x, buffer.Height - y); + LoadAndStretchEdges(currentRows, this.pixelSpan, x, buffer.Width - x, buffer.Height - y, new Size(8)); PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); @@ -94,13 +100,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) { Memory.Buffer2D buffer = frame.PixelBuffer; - LoadAndStretchEdges(currentRows, this.pixelSpan, x, Math.Min(8, buffer.Width - x), Math.Min(8, buffer.Height - y)); + LoadAndStretchEdges(currentRows, this.pixelSpan, x, Math.Min(16, buffer.Width - x), Math.Min(8, buffer.Height - y), new Size(16, 8)); PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); if (RgbToYCbCrConverterVectorized.IsSupported) { - RgbToYCbCrConverterVectorized.Convert420(this.rgbSpan, ref this.Y, ref this.Cb, ref this.Cr, idx); + RgbToYCbCrConverterVectorized.Convert420_16x8(this.rgbSpan, this.twinBlocksY, ref this.Cb, ref this.Cr, idx); } else { @@ -110,7 +116,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } // TODO: add DebugGuard checks? - private static void LoadAndStretchEdges(RowOctet source, Span dest, int startX, int width, int height) + private static void LoadAndStretchEdges(RowOctet source, Span dest, int startX, int width, int height, Size areaSize) { //Guard.MustBeBetweenOrEqualTo(width, 1, 8, nameof(width)); //Guard.MustBeBetweenOrEqualTo(height, 1, 8, nameof(width)); @@ -122,10 +128,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } uint byteWidth = (uint)(width * Unsafe.SizeOf()); - int remainderXCount = 8 - width; + int remainderXCount = areaSize.Width - width; ref byte blockStart = ref MemoryMarshal.GetReference(MemoryMarshal.Cast(dest)); - int rowSizeInBytes = 8 * Unsafe.SizeOf(); + int rowSizeInBytes = areaSize.Width * Unsafe.SizeOf(); for (int y = 0; y < height; y++) { @@ -144,7 +150,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - int remainderYCount = 8 - height; + int remainderYCount = areaSize.Height - height; if (remainderYCount == 0) { From 5ed7e2d1b734c57148e1f6253aee45ae944f9c14 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 01:09:41 +0300 Subject: [PATCH 56/99] Added quality params to the jpeg encoder benchmark --- tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs index e22259f76..e807c416b 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs @@ -13,9 +13,10 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { public class EncodeJpeg { + [Params(50, 75, 95, 100)] + public int Quality; + private const string TestImage = TestImages.Jpeg.BenchmarkSuite.Jpeg420Exif_MidSizeYCbCr; - // GDI+ most likely uses 75 as default quality - https://stackoverflow.com/questions/3957477/what-quality-level-does-image-save-use-for-jpeg-files - private const int EncodingQuality = 75; // GDI+ uses 4:2:0 subsampling private const JpegSubsample EncodingSubsampling = JpegSubsample.Ratio420; @@ -41,14 +42,14 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg this.bmpCore = Image.Load(this.bmpStream); this.bmpCore.Metadata.ExifProfile = null; - this.encoder = new JpegEncoder { Quality = EncodingQuality, Subsample = EncodingSubsampling }; + this.encoder = new JpegEncoder { Quality = Quality, Subsample = EncodingSubsampling }; this.bmpStream.Position = 0; this.bmpDrawing = SDImage.FromStream(this.bmpStream); this.jpegCodec = GetEncoder(ImageFormat.Jpeg); this.encoderParameters = new EncoderParameters(1); // Quality cast to long is necessary - this.encoderParameters.Param[0] = new EncoderParameter(Encoder.Quality, (long)EncodingQuality); + this.encoderParameters.Param[0] = new EncoderParameter(Encoder.Quality, (long)Quality); this.destinationStream = new MemoryStream(); } From d6db6b6be75dbc73dbb238cc02c6fcca31131d0c Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 01:23:09 +0300 Subject: [PATCH 57/99] Fixed compilation errors for non-intrinsic platforms --- .../Encoder/RgbToYCbCrConverterVectorized.cs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index a44b174d8..e5fe4dea2 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -125,8 +125,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices with 4:2:0 subsampling /// /// Total size of rgb span must be 200 bytes - /// Span of rgb pixels with size of 64 - /// 8x8 destination matrix of Luminance(Y) converted dataф public static void Convert420(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock, int idx) { Debug.Assert(IsSupported, "AVX2 is required to run this converter"); @@ -207,12 +205,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Converts 16x8 Rgb24 pixels matrix to 2 Y 8x8 matrices with 4:2:0 subsampling /// - /// - /// - /// - /// - /// - /// public static void Convert420_16x8(ReadOnlySpan rgbSpan, Span yBlocks, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) { Debug.Assert(IsSupported, "AVX2 is required to run this converter"); @@ -286,6 +278,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } +#if SUPPORTS_RUNTIME_INTRINSICS [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Scale_8x4_4x2(Span> v) { @@ -335,5 +328,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); } } +#endif } } From 39569866fc022d08e431dd11c4eda5b9985b40f8 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 11:43:32 +0300 Subject: [PATCH 58/99] Added debug guard checks to LoadAndStretchEdges --- .../Encoder/YCbCrForwardConverter{TPixel}.cs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index 120b21e10..a059f978d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -115,17 +115,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - // TODO: add DebugGuard checks? + private static void LoadAndStretchEdges(RowOctet source, Span dest, int startX, int width, int height, Size areaSize) { - //Guard.MustBeBetweenOrEqualTo(width, 1, 8, nameof(width)); - //Guard.MustBeBetweenOrEqualTo(height, 1, 8, nameof(width)); - - // TODO: this is a strange check, most likely it was introduces due to 2x 8x8 blocks subsampling, should be gone after new 4:2:0 implementation - if (width <= 0 || height <= 0) - { - return; - } + DebugGuard.MustBeBetweenOrEqualTo(width, 1, areaSize.Width, nameof(width)); + DebugGuard.MustBeBetweenOrEqualTo(height, 1, areaSize.Height, nameof(height)); uint byteWidth = (uint)(width * Unsafe.SizeOf()); int remainderXCount = areaSize.Width - width; From 0d94435d653d5dc9cf88e162182a7e3be84c15b1 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 12:55:23 +0300 Subject: [PATCH 59/99] Simplified LoadAndStretchEdges call logic --- .../Encoder/YCbCrForwardConverter{TPixel}.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index a059f978d..963e6dd9e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -76,7 +76,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows) { Memory.Buffer2D buffer = frame.PixelBuffer; - LoadAndStretchEdges(currentRows, this.pixelSpan, x, buffer.Width - x, buffer.Height - y, new Size(8)); + LoadAndStretchEdges(currentRows, this.pixelSpan, x, y, new Size(8), new Size(buffer.Width, buffer.Height)); PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); @@ -100,7 +100,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) { Memory.Buffer2D buffer = frame.PixelBuffer; - LoadAndStretchEdges(currentRows, this.pixelSpan, x, Math.Min(16, buffer.Width - x), Math.Min(8, buffer.Height - y), new Size(16, 8)); + LoadAndStretchEdges(currentRows, this.pixelSpan, x, y, new Size(16, 8), new Size(buffer.Width, buffer.Height)); PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); @@ -116,10 +116,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } - private static void LoadAndStretchEdges(RowOctet source, Span dest, int startX, int width, int height, Size areaSize) + private static void LoadAndStretchEdges(RowOctet source, Span dest, int startX, int startY, Size areaSize, Size borders) { - DebugGuard.MustBeBetweenOrEqualTo(width, 1, areaSize.Width, nameof(width)); - DebugGuard.MustBeBetweenOrEqualTo(height, 1, areaSize.Height, nameof(height)); + int width = Math.Min(areaSize.Width, borders.Width - startX); + int height = Math.Min(areaSize.Height, borders.Height - startY); uint byteWidth = (uint)(width * Unsafe.SizeOf()); int remainderXCount = areaSize.Width - width; From 13e7cf358fb64b18aa06ba646f0d6feedac426fc Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 15:43:48 +0300 Subject: [PATCH 60/99] Divided YCbCr converters into 444/420 subsampling categories --- .../Components/Encoder/HuffmanScanEncoder.cs | 4 +- .../YCbCrForwardConverter444{TPixel}.cs | 118 +++++++++++++++++ .../Encoder/YCbCrForwardConverter{TPixel}.cs | 122 ++---------------- 3 files changed, 130 insertions(+), 114 deletions(-) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index f6e55153a..4fbd9e4ec 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -71,7 +71,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // ReSharper disable once InconsistentNaming int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; - var pixelConverter = YCbCrForwardConverter.Create(); + var pixelConverter = YCbCrForwardConverter444.Create(); ImageFrame frame = pixels.Frames.RootFrame; Buffer2D pixelBuffer = frame.PixelBuffer; RowOctet currentRows = default; @@ -125,7 +125,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { var unzig = ZigZag.CreateUnzigTable(); - var pixelConverter = YCbCrForwardConverter.Create(); + var pixelConverter = YCbCrForwardConverter444.Create(); // ReSharper disable once InconsistentNaming int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs new file mode 100644 index 000000000..58bb1d559 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs @@ -0,0 +1,118 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Advanced; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder +{ + /// + /// On-stack worker struct to efficiently encapsulate the TPixel -> Rgb24 -> YCbCr conversion chain of 8x8 pixel blocks. + /// + /// The pixel type to work on + internal ref struct YCbCrForwardConverter444 + where TPixel : unmanaged, IPixel + { + /// + /// The Y component + /// + public Block8x8F Y; + + /// + /// The Cb component + /// + public Block8x8F Cb; + + /// + /// The Cr component + /// + public Block8x8F Cr; + + /// + /// The color conversion tables + /// + private RgbToYCbCrConverterLut colorTables; + + /// + /// Temporal 8x8 block to hold TPixel data + /// + private Span pixelSpan; + + /// + /// Temporal RGB block + /// + private Span rgbSpan; + + public Span twinBlocksY; + + public static YCbCrForwardConverter444 Create() + { + var result = default(YCbCrForwardConverter444); + + // creating rgb pixel bufferr + // TODO: this is subject to discuss + const int twoBlocksByteSizeWithPadding = 384 + 8; // converter.Convert comments for +8 padding + result.rgbSpan = MemoryMarshal.Cast(new byte[twoBlocksByteSizeWithPadding].AsSpan()); + // TODO: this size should be configurable + result.pixelSpan = new TPixel[128].AsSpan(); + + result.twinBlocksY = new Block8x8F[2].AsSpan(); + + // Avoid creating lookup tables, when vectorized converter is supported + if (!RgbToYCbCrConverterVectorized.IsSupported) + { + result.colorTables = RgbToYCbCrConverterLut.Create(); + } + + return result; + } + + /// + /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) + /// + public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows) + { + Memory.Buffer2D buffer = frame.PixelBuffer; + YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), new Size(8), new Size(buffer.Width, buffer.Height)); + + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); + + ref Block8x8F yBlock = ref this.Y; + ref Block8x8F cbBlock = ref this.Cb; + ref Block8x8F crBlock = ref this.Cr; + + if (RgbToYCbCrConverterVectorized.IsSupported) + { + RgbToYCbCrConverterVectorized.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + } + else + { + this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + } + } + + /// + /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) + /// + public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) + { + Memory.Buffer2D buffer = frame.PixelBuffer; + YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), new Size(16, 8), new Size(buffer.Width, buffer.Height)); + + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); + + if (RgbToYCbCrConverterVectorized.IsSupported) + { + RgbToYCbCrConverterVectorized.Convert420_16x8(this.rgbSpan, this.twinBlocksY, ref this.Cb, ref this.Cr, idx); + } + else + { + throw new NotSupportedException("This is not yet implemented"); + //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index 963e6dd9e..f5ef77091 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -4,134 +4,32 @@ using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { - /// - /// On-stack worker struct to efficiently encapsulate the TPixel -> Rgb24 -> YCbCr conversion chain of 8x8 pixel blocks. - /// - /// The pixel type to work on - internal ref struct YCbCrForwardConverter + internal static class YCbCrForwardConverter where TPixel : unmanaged, IPixel { - /// - /// The Y component - /// - public Block8x8F Y; - - /// - /// The Cb component - /// - public Block8x8F Cb; - - /// - /// The Cr component - /// - public Block8x8F Cr; - - /// - /// The color conversion tables - /// - private RgbToYCbCrConverterLut colorTables; - - /// - /// Temporal 8x8 block to hold TPixel data - /// - private Span pixelSpan; - - /// - /// Temporal RGB block - /// - private Span rgbSpan; - - public Span twinBlocksY; - - public static YCbCrForwardConverter Create() - { - var result = default(YCbCrForwardConverter); - - // creating rgb pixel bufferr - // TODO: this is subject to discuss - const int twoBlocksByteSizeWithPadding = 384 + 8; // converter.Convert comments for +8 padding - result.rgbSpan = MemoryMarshal.Cast(new byte[twoBlocksByteSizeWithPadding].AsSpan()); - // TODO: this size should be configurable - result.pixelSpan = new TPixel[128].AsSpan(); - - result.twinBlocksY = new Block8x8F[2].AsSpan(); - - // Avoid creating lookup tables, when vectorized converter is supported - if (!RgbToYCbCrConverterVectorized.IsSupported) - { - result.colorTables = RgbToYCbCrConverterLut.Create(); - } - - return result; - } - - /// - /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) - /// - public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows) + public static void LoadAndStretchEdges(RowOctet source, Span dest, Point start, Size sampleSize, Size totalSize) { - Memory.Buffer2D buffer = frame.PixelBuffer; - LoadAndStretchEdges(currentRows, this.pixelSpan, x, y, new Size(8), new Size(buffer.Width, buffer.Height)); - - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); - - ref Block8x8F yBlock = ref this.Y; - ref Block8x8F cbBlock = ref this.Cb; - ref Block8x8F crBlock = ref this.Cr; + DebugGuard.MustBeBetweenOrEqualTo(start.X, 1, totalSize.Width - 1, nameof(start.X)); + DebugGuard.MustBeBetweenOrEqualTo(start.Y, 1, totalSize.Height - 1, nameof(start.Y)); - if (RgbToYCbCrConverterVectorized.IsSupported) - { - RgbToYCbCrConverterVectorized.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); - } - else - { - this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); - } - } - - /// - /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) - /// - public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) - { - Memory.Buffer2D buffer = frame.PixelBuffer; - LoadAndStretchEdges(currentRows, this.pixelSpan, x, y, new Size(16, 8), new Size(buffer.Width, buffer.Height)); - - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); - - if (RgbToYCbCrConverterVectorized.IsSupported) - { - RgbToYCbCrConverterVectorized.Convert420_16x8(this.rgbSpan, this.twinBlocksY, ref this.Cb, ref this.Cr, idx); - } - else - { - throw new NotSupportedException("This is not yet implemented"); - //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); - } - } - - - private static void LoadAndStretchEdges(RowOctet source, Span dest, int startX, int startY, Size areaSize, Size borders) - { - int width = Math.Min(areaSize.Width, borders.Width - startX); - int height = Math.Min(areaSize.Height, borders.Height - startY); + int width = Math.Min(sampleSize.Width, totalSize.Width - start.X); + int height = Math.Min(sampleSize.Height, totalSize.Height - start.Y); uint byteWidth = (uint)(width * Unsafe.SizeOf()); - int remainderXCount = areaSize.Width - width; + int remainderXCount = sampleSize.Width - width; ref byte blockStart = ref MemoryMarshal.GetReference(MemoryMarshal.Cast(dest)); - int rowSizeInBytes = areaSize.Width * Unsafe.SizeOf(); + int rowSizeInBytes = sampleSize.Width * Unsafe.SizeOf(); for (int y = 0; y < height; y++) { Span row = source[y]; - ref byte s = ref Unsafe.As(ref row[startX]); + ref byte s = ref Unsafe.As(ref row[start.X]); ref byte d = ref Unsafe.Add(ref blockStart, y * rowSizeInBytes); Unsafe.CopyBlock(ref d, ref s, byteWidth); @@ -144,7 +42,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - int remainderYCount = areaSize.Height - height; + int remainderYCount = sampleSize.Height - height; if (remainderYCount == 0) { From 12b4b83cb6df5499d0b2211ae8ddf4d6b7e88363 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 18:21:36 +0300 Subject: [PATCH 61/99] 444 converter fixes --- .../YCbCrForwardConverter444{TPixel}.cs | 42 ++++++------------- 1 file changed, 12 insertions(+), 30 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs index 58bb1d559..8fef55302 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs @@ -16,6 +16,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder internal ref struct YCbCrForwardConverter444 where TPixel : unmanaged, IPixel { + // TODO: documentation + private const int RgbSpanByteSize = 8 * 8 * 3; + // TODO: documentation + private const int PixelSpanSize = 8 * 8; + + /// /// The Y component /// @@ -37,29 +43,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder private RgbToYCbCrConverterLut colorTables; /// - /// Temporal 8x8 block to hold TPixel data + /// Temporal 64-byte span to hold unconverted TPixel data /// private Span pixelSpan; /// - /// Temporal RGB block + /// Temporal 64-byte span to hold converted Rgb24 data /// private Span rgbSpan; - public Span twinBlocksY; - public static YCbCrForwardConverter444 Create() { var result = default(YCbCrForwardConverter444); // creating rgb pixel bufferr // TODO: this is subject to discuss - const int twoBlocksByteSizeWithPadding = 384 + 8; // converter.Convert comments for +8 padding - result.rgbSpan = MemoryMarshal.Cast(new byte[twoBlocksByteSizeWithPadding].AsSpan()); - // TODO: this size should be configurable - result.pixelSpan = new TPixel[128].AsSpan(); + // converter.Convert comments for +8 padding + result.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + 8].AsSpan()); - result.twinBlocksY = new Block8x8F[2].AsSpan(); + // TODO: this is subject to discuss + result.pixelSpan = new TPixel[PixelSpanSize].AsSpan(); // Avoid creating lookup tables, when vectorized converter is supported if (!RgbToYCbCrConverterVectorized.IsSupported) @@ -93,26 +96,5 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } - - /// - /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) - /// - public void Convert420(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) - { - Memory.Buffer2D buffer = frame.PixelBuffer; - YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), new Size(16, 8), new Size(buffer.Width, buffer.Height)); - - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); - - if (RgbToYCbCrConverterVectorized.IsSupported) - { - RgbToYCbCrConverterVectorized.Convert420_16x8(this.rgbSpan, this.twinBlocksY, ref this.Cb, ref this.Cr, idx); - } - else - { - throw new NotSupportedException("This is not yet implemented"); - //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); - } - } } } From 953095f1b981a59372bfc7b7c7c94ce8d4d68002 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 18:52:03 +0300 Subject: [PATCH 62/99] 420 converter fixes --- .../Components/Encoder/HuffmanScanEncoder.cs | 10 +++--- .../Encoder/RgbToYCbCrConverterVectorized.cs | 35 +++++++++++++++++-- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 4fbd9e4ec..3231c5781 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -125,7 +125,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { var unzig = ZigZag.CreateUnzigTable(); - var pixelConverter = YCbCrForwardConverter444.Create(); + var pixelConverter = YCbCrForwardConverter420.Create(); // ReSharper disable once InconsistentNaming int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; @@ -138,23 +138,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder cancellationToken.ThrowIfCancellationRequested(); for (int x = 0; x < pixels.Width; x += 16) { - for(int i = 0; i < 2; i++) + for (int i = 0; i < 2; i++) { int yOff = i * 8; currentRows.Update(pixelBuffer, y + yOff); - pixelConverter.Convert420(frame, x, y, ref currentRows, i); + pixelConverter.Convert(frame, x, y, ref currentRows, i); prevDCY = this.WriteBlock( QuantIndex.Luminance, prevDCY, - ref pixelConverter.twinBlocksY[0], + ref pixelConverter.YLeft, ref luminanceQuantTable, ref unzig); prevDCY = this.WriteBlock( QuantIndex.Luminance, prevDCY, - ref pixelConverter.twinBlocksY[1], + ref pixelConverter.YRight, ref luminanceQuantTable, ref unzig); } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index e5fe4dea2..cf4d47774 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -28,6 +28,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } #if SUPPORTS_RUNTIME_INTRINSICS + // TODO: documentation + public const int AvxRegisterRgbCompatibilityOffset = 8; + private static ReadOnlySpan MoveFirst24BytesToSeparateLanes => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, @@ -205,7 +208,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Converts 16x8 Rgb24 pixels matrix to 2 Y 8x8 matrices with 4:2:0 subsampling /// - public static void Convert420_16x8(ReadOnlySpan rgbSpan, Span yBlocks, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) + public static void Convert420_16x8(ReadOnlySpan rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) { Debug.Assert(IsSupported, "AVX2 is required to run this converter"); @@ -241,7 +244,33 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int i = 0; i < 4; i++) { // 16x2 => 8x1 - for (int j = 0; j < 4; j++) + // left 8x8 column conversions + for (int j = 0; j < 4; j += 2) + { + rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte(); + + rgb = Avx2.Shuffle(rgb, extractRgbMask); + + rg = Avx2.UnpackLow(rgb, zero); + bx = Avx2.UnpackHigh(rgb, zero); + + r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); + g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); + b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); + + int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1); + + // (0.299F * r) + (0.587F * g) + (0.114F * b); + Unsafe.Add(ref yBlockLeft.V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); + + rDataLanes[j] = r; + gDataLanes[j] = g; + bDataLanes[j] = b; + } + + // 16x2 => 8x1 + // right 8x8 column conversions + for (int j = 1; j < 4; j += 2) { rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte(); @@ -257,7 +286,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1); // (0.299F * r) + (0.587F * g) + (0.114F * b); - Unsafe.Add(ref yBlocks[j & 1].V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); + Unsafe.Add(ref yBlockRight.V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); rDataLanes[j] = r; gDataLanes[j] = g; From 5fc29a2e9899171878b6c703868f657c62f8e735 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 18:52:39 +0300 Subject: [PATCH 63/99] Introduced separate 420 converter --- .../YCbCrForwardConverter420{TPixel}.cs | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs new file mode 100644 index 000000000..c831b611c --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs @@ -0,0 +1,95 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Advanced; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder +{ + /// + /// On-stack worker struct to efficiently encapsulate the TPixel -> Rgb24 -> YCbCr conversion chain of 8x8 pixel blocks. + /// + /// The pixel type to work on + internal ref struct YCbCrForwardConverter420 + where TPixel : unmanaged, IPixel + { + /// + /// The left Y component + /// + public Block8x8F YLeft; + + /// + /// The left Y component + /// + public Block8x8F YRight; + + /// + /// The Cb component + /// + public Block8x8F Cb; + + /// + /// The Cr component + /// + public Block8x8F Cr; + + /// + /// The color conversion tables + /// + private RgbToYCbCrConverterLut colorTables; + + /// + /// Temporal 16x8 block to hold TPixel data + /// + private Span pixelSpan; + + /// + /// Temporal RGB block + /// + private Span rgbSpan; + + public static YCbCrForwardConverter420 Create() + { + var result = default(YCbCrForwardConverter420); + + // TODO: this is subject to discuss + const int twoBlocksByteSizeWithPadding = 384 + 8; // converter.Convert comments for +8 padding + result.rgbSpan = MemoryMarshal.Cast(new byte[twoBlocksByteSizeWithPadding].AsSpan()); + + // TODO: this size should be configurable + result.pixelSpan = new TPixel[128].AsSpan(); + + // Avoid creating lookup tables, when vectorized converter is supported + if (!RgbToYCbCrConverterVectorized.IsSupported) + { + result.colorTables = RgbToYCbCrConverterLut.Create(); + } + + return result; + } + + /// + /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) + /// + public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) + { + Memory.Buffer2D buffer = frame.PixelBuffer; + YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), new Size(16, 8), new Size(buffer.Width, buffer.Height)); + + PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); + + if (RgbToYCbCrConverterVectorized.IsSupported) + { + RgbToYCbCrConverterVectorized.Convert420_16x8(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx); + } + else + { + throw new NotSupportedException("This is not yet implemented"); + //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + } + } + } +} From cb1acaec78c92688774f7245c6ae7345a2aeda6a Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 22:30:45 +0300 Subject: [PATCH 64/99] Finished 420 subsampling converter --- .../Components/Encoder/HuffmanScanEncoder.cs | 6 +- .../Encoder/RgbToYCbCrConverterVectorized.cs | 16 +++++- .../YCbCrForwardConverter420{TPixel}.cs | 55 +++++++++++++------ 3 files changed, 53 insertions(+), 24 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 3231c5781..283a98fab 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -125,14 +125,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { var unzig = ZigZag.CreateUnzigTable(); - var pixelConverter = YCbCrForwardConverter420.Create(); - // ReSharper disable once InconsistentNaming int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; ImageFrame frame = pixels.Frames.RootFrame; Buffer2D pixelBuffer = frame.PixelBuffer; RowOctet currentRows = default; + var pixelConverter = new YCbCrForwardConverter420(frame); + for (int y = 0; y < pixels.Height; y += 16) { cancellationToken.ThrowIfCancellationRequested(); @@ -142,7 +142,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder { int yOff = i * 8; currentRows.Update(pixelBuffer, y + yOff); - pixelConverter.Convert(frame, x, y, ref currentRows, i); + pixelConverter.Convert(x, y, ref currentRows, i); prevDCY = this.WriteBlock( QuantIndex.Luminance, diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index cf4d47774..b9f0fa427 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -27,9 +27,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } + public static int AvxRegisterRgbCompatibilityPadding + { + get + { + if (IsSupported) + { + return 8; + } + + return 0; + } + } + #if SUPPORTS_RUNTIME_INTRINSICS - // TODO: documentation - public const int AvxRegisterRgbCompatibilityOffset = 8; private static ReadOnlySpan MoveFirst24BytesToSeparateLanes => new byte[] { @@ -306,7 +317,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder #endif } - #if SUPPORTS_RUNTIME_INTRINSICS [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Scale_8x4_4x2(Span> v) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs index c831b611c..fdb41a8e2 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs @@ -16,6 +16,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder internal ref struct YCbCrForwardConverter420 where TPixel : unmanaged, IPixel { + // TODO: docs + private const int PixelsPerSample = 16 * 8; + + // TODO: docs + private static int RgbSpanByteSize = PixelsPerSample * 3; + + // TODO: docs + private static readonly Size SampleSize = new Size(16, 8); + /// /// The left Y component /// @@ -51,35 +60,45 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// private Span rgbSpan; - public static YCbCrForwardConverter420 Create() + // TODO: docs + private Size samplingAreaSize; + + // TODO: docs + private Configuration config; + + + public YCbCrForwardConverter420(ImageFrame frame) { - var result = default(YCbCrForwardConverter420); + // matrices would be filled during convert calls + this.YLeft = default; + this.YRight = default; + this.Cb = default; + this.Cr = default; - // TODO: this is subject to discuss - const int twoBlocksByteSizeWithPadding = 384 + 8; // converter.Convert comments for +8 padding - result.rgbSpan = MemoryMarshal.Cast(new byte[twoBlocksByteSizeWithPadding].AsSpan()); + // temporal pixel buffers + this.pixelSpan = new TPixel[PixelsPerSample].AsSpan(); + this.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxRegisterRgbCompatibilityPadding].AsSpan()); - // TODO: this size should be configurable - result.pixelSpan = new TPixel[128].AsSpan(); + // frame data + this.samplingAreaSize = new Size(frame.Width, frame.Height); + this.config = frame.GetConfiguration(); - // Avoid creating lookup tables, when vectorized converter is supported + // conversion vector fallback data if (!RgbToYCbCrConverterVectorized.IsSupported) { - result.colorTables = RgbToYCbCrConverterLut.Create(); + this.colorTables = RgbToYCbCrConverterLut.Create(); + } + else + { + this.colorTables = default; } - - return result; } - /// - /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) - /// - public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows, int idx) + public void Convert(int x, int y, ref RowOctet currentRows, int idx) { - Memory.Buffer2D buffer = frame.PixelBuffer; - YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), new Size(16, 8), new Size(buffer.Width, buffer.Height)); + YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize); - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); + PixelOperations.Instance.ToRgb24(this.config, this.pixelSpan, this.rgbSpan); if (RgbToYCbCrConverterVectorized.IsSupported) { From 672da457d340b2ae6df50d880dfdba0f12c9e2ec Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 30 May 2021 22:44:09 +0300 Subject: [PATCH 65/99] Finished 444 subsampling converter --- .../Components/Encoder/HuffmanScanEncoder.cs | 5 +- .../YCbCrForwardConverter444{TPixel}.cs | 53 +++++++++++++++---- 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 283a98fab..218b2b59c 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -71,11 +71,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // ReSharper disable once InconsistentNaming int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; - var pixelConverter = YCbCrForwardConverter444.Create(); ImageFrame frame = pixels.Frames.RootFrame; Buffer2D pixelBuffer = frame.PixelBuffer; RowOctet currentRows = default; + var pixelConverter = new YCbCrForwardConverter444(frame); + for (int y = 0; y < pixels.Height; y += 8) { cancellationToken.ThrowIfCancellationRequested(); @@ -83,7 +84,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int x = 0; x < pixels.Width; x += 8) { - pixelConverter.Convert(frame, x, y, ref currentRows); + pixelConverter.Convert(x, y, ref currentRows); prevDCY = this.WriteBlock( QuantIndex.Luminance, diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs index 8fef55302..27f7e3ae9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs @@ -16,10 +16,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder internal ref struct YCbCrForwardConverter444 where TPixel : unmanaged, IPixel { - // TODO: documentation - private const int RgbSpanByteSize = 8 * 8 * 3; - // TODO: documentation - private const int PixelSpanSize = 8 * 8; + // TODO: docs + private const int PixelsPerSample = 8 * 8; + + // TODO: docs + private const int RgbSpanByteSize = PixelsPerSample * 3; + + // TODO: docs + private static readonly Size SampleSize = new Size(8, 8); /// @@ -52,6 +56,38 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// private Span rgbSpan; + // TODO: docs + private Size samplingAreaSize; + + // TODO: docs + private readonly Configuration config; + + public YCbCrForwardConverter444(ImageFrame frame) + { + // matrices would be filled during convert calls + this.Y = default; + this.Cb = default; + this.Cr = default; + + // temporal pixel buffers + this.pixelSpan = new TPixel[PixelsPerSample].AsSpan(); + this.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxRegisterRgbCompatibilityPadding].AsSpan()); + + // frame data + this.samplingAreaSize = new Size(frame.Width, frame.Height); + this.config = frame.GetConfiguration(); + + // conversion vector fallback data + if (!RgbToYCbCrConverterVectorized.IsSupported) + { + this.colorTables = RgbToYCbCrConverterLut.Create(); + } + else + { + this.colorTables = default; + } + } + public static YCbCrForwardConverter444 Create() { var result = default(YCbCrForwardConverter444); @@ -62,7 +98,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder result.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + 8].AsSpan()); // TODO: this is subject to discuss - result.pixelSpan = new TPixel[PixelSpanSize].AsSpan(); + result.pixelSpan = new TPixel[PixelsPerSample].AsSpan(); // Avoid creating lookup tables, when vectorized converter is supported if (!RgbToYCbCrConverterVectorized.IsSupported) @@ -76,12 +112,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) /// - public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows) + public void Convert(int x, int y, ref RowOctet currentRows) { - Memory.Buffer2D buffer = frame.PixelBuffer; - YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), new Size(8), new Size(buffer.Width, buffer.Height)); + YCbCrForwardConverter.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize); - PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelSpan, this.rgbSpan); + PixelOperations.Instance.ToRgb24(this.config, this.pixelSpan, this.rgbSpan); ref Block8x8F yBlock = ref this.Y; ref Block8x8F cbBlock = ref this.Cb; From 1d54702dc1ae9b65cb471eeeaa331ded112479cc Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 2 Jun 2021 17:24:56 +0100 Subject: [PATCH 66/99] Update shared-infrastructure --- shared-infrastructure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared-infrastructure b/shared-infrastructure index 48e73f455..1f7ee7028 160000 --- a/shared-infrastructure +++ b/shared-infrastructure @@ -1 +1 @@ -Subproject commit 48e73f455f15eafefbe3175efc7433e5f277e506 +Subproject commit 1f7ee702812f3a1713ab7f749c0faae0ef139ed7 From 5ea8da6c979f4e5a8dc2ba7131e0624ec1535ca1 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 2 Jun 2021 18:23:09 +0100 Subject: [PATCH 67/99] Fix BitOperations --- src/ImageSharp/Common/Helpers/Numerics.cs | 33 ++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index e8ba6dde6..6bf06150b 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -23,6 +23,28 @@ namespace SixLabors.ImageSharp private const int ShuffleAlphaControl = 0b_11_11_11_11; #endif +#if !SUPPORTS_BITOPERATIONS + /// + /// Gets the counts the number of bits needed to hold an integer. + /// + private static ReadOnlySpan BitCountLut => new byte[] + { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, + }; +#endif + /// /// Determine the Greatest CommonDivisor (GCD) of two numbers. /// @@ -756,7 +778,7 @@ namespace SixLabors.ImageSharp /// widening them to 32-bit integers and performing four additions. /// /// - /// byte(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + /// byte(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) /// is widened and added onto as such: /// /// accumulator += i32(1, 2, 3, 4); @@ -834,8 +856,17 @@ namespace SixLabors.ImageSharp [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int MinimumBitsToStore(uint number) { +#if !SUPPORTS_BITOPERATIONS + if (number < 0x100) + { + return BitCountLut[(int)number]; + } + + return 8 + BitCountLut[(int)number >> 8]; +#else const int bitInUnsignedInteger = sizeof(uint) * 8; return bitInUnsignedInteger - BitOperations.LeadingZeroCount(number); +#endif } } } From de176b699e377ce4da7f005c66a9351d77b8eed1 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 3 Jun 2021 17:35:23 +0300 Subject: [PATCH 68/99] Initial 420 subsampling lut conversion implementation --- .../Encoder/RgbToYCbCrConverterLut.cs | 90 +++++++++++++++++++ .../YCbCrForwardConverter420{TPixel}.cs | 3 +- 2 files changed, 91 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index 1ceea1e08..635e571b7 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -115,6 +115,34 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder crResult[i] = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ConvertPixelInto( + int r, + int g, + int b, + ref Block8x8F yResult, + int i) + { + // float y = (0.299F * r) + (0.587F * g) + (0.114F * b); + yResult[i] = (this.YRTable[r] + this.YGTable[g] + this.YBTable[b]) >> ScaleBits; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ConvertPixelInto( + int r, + int g, + int b, + ref Block8x8F cbResult, + ref Block8x8F crResult, + int i) + { + // float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)); + cbResult[i] = (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits; + + // float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)); + crResult[i] = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; + } + public void Convert(Span rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) { ref Rgb24 rgbStart = ref rgbSpan[0]; @@ -134,6 +162,68 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } + public void Convert(Span rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) + { + ref Rgb24 rgbStart = ref rgbSpan[0]; + for (int i = 0; i < 8; i += 2) + { + Span r = stackalloc int[8]; + Span g = stackalloc int[8]; + Span b = stackalloc int[8]; + + for (int j = 0; j < 2; j++) + { + // left + ref Rgb24 stride = ref Unsafe.Add(ref rgbStart, (i + j) * 16); + for (int k = 0; k < 8; k += 2) + { + int r0 = Unsafe.Add(ref stride, k).R; + int g0 = Unsafe.Add(ref stride, k).G; + int b0 = Unsafe.Add(ref stride, k).B; + this.ConvertPixelInto(r0, g0, b0, ref yBlockLeft, (i + j) * 8 + k); + + int r1 = Unsafe.Add(ref stride, k + 1).R; + int g1 = Unsafe.Add(ref stride, k + 1).G; + int b1 = Unsafe.Add(ref stride, k + 1).B; + this.ConvertPixelInto(r1, g1, b1, ref yBlockLeft, (i + j) * 8 + k + 1); + + int idx = k / 2; + r[idx] += r0 + r1; + g[idx] += g0 + g1; + b[idx] += b0 + b1; + } + + // right + stride = ref Unsafe.Add(ref stride, 8); + for (int k = 0; k < 8; k += 2) + { + int r0 = Unsafe.Add(ref stride, k).R; + int g0 = Unsafe.Add(ref stride, k).G; + int b0 = Unsafe.Add(ref stride, k).B; + this.ConvertPixelInto(r0, g0, b0, ref yBlockRight, (i + j) * 8 + k); + + int r1 = Unsafe.Add(ref stride, k + 1).R; + int g1 = Unsafe.Add(ref stride, k + 1).G; + int b1 = Unsafe.Add(ref stride, k + 1).B; + this.ConvertPixelInto(r1, g1, b1, ref yBlockRight, (i + j) * 8 + k + 1); + + int idx = 4 + (k / 2); + r[idx] += r0 + r1; + g[idx] += g0 + g1; + b[idx] += b0 + b1; + } + } + + int writeIdx = + row * Block8x8F.Size / 2 // upper or lower part + + (i / 2) * 8; // which row + for (int j = 0; j < 8; j++) + { + this.ConvertPixelInto(r[j] / 4, g[j] / 4, b[j] / 4, ref cbBlock, ref crBlock, writeIdx + j); + } + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int Fix(float x) => (int)((x * (1L << ScaleBits)) + 0.5F); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs index fdb41a8e2..2e8433cdc 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs @@ -106,8 +106,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } else { - throw new NotSupportedException("This is not yet implemented"); - //this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + this.colorTables.Convert(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx); } } } From 7896e24606ba15500e43bcdaa856cebee9e42b67 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 4 Jun 2021 13:47:10 +0300 Subject: [PATCH 69/99] Improved non-simd ycbcr lut converter code --- .../Encoder/RgbToYCbCrConverterLut.cs | 37 ++++++++----------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index 635e571b7..06e8f26b6 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -177,40 +177,33 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref Rgb24 stride = ref Unsafe.Add(ref rgbStart, (i + j) * 16); for (int k = 0; k < 8; k += 2) { - int r0 = Unsafe.Add(ref stride, k).R; - int g0 = Unsafe.Add(ref stride, k).G; - int b0 = Unsafe.Add(ref stride, k).B; - this.ConvertPixelInto(r0, g0, b0, ref yBlockLeft, (i + j) * 8 + k); + Rgb24 px0 = Unsafe.Add(ref stride, k); + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockLeft, (i + j) * 8 + k); - int r1 = Unsafe.Add(ref stride, k + 1).R; - int g1 = Unsafe.Add(ref stride, k + 1).G; - int b1 = Unsafe.Add(ref stride, k + 1).B; - this.ConvertPixelInto(r1, g1, b1, ref yBlockLeft, (i + j) * 8 + k + 1); + Rgb24 px1 = Unsafe.Add(ref stride, k + 1); + this.ConvertPixelInto(px1.R, px1.G, px1.B, ref yBlockLeft, (i + j) * 8 + k + 1); int idx = k / 2; - r[idx] += r0 + r1; - g[idx] += g0 + g1; - b[idx] += b0 + b1; + r[idx] += px0.R + px1.R; + g[idx] += px0.G + px1.G; + b[idx] += px0.B + px1.B; } // right stride = ref Unsafe.Add(ref stride, 8); for (int k = 0; k < 8; k += 2) { - int r0 = Unsafe.Add(ref stride, k).R; - int g0 = Unsafe.Add(ref stride, k).G; - int b0 = Unsafe.Add(ref stride, k).B; - this.ConvertPixelInto(r0, g0, b0, ref yBlockRight, (i + j) * 8 + k); + Rgb24 px0 = Unsafe.Add(ref stride, k); + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRight, (i + j) * 8 + k); - int r1 = Unsafe.Add(ref stride, k + 1).R; - int g1 = Unsafe.Add(ref stride, k + 1).G; - int b1 = Unsafe.Add(ref stride, k + 1).B; - this.ConvertPixelInto(r1, g1, b1, ref yBlockRight, (i + j) * 8 + k + 1); + Rgb24 px1 = Unsafe.Add(ref stride, k + 1); + this.ConvertPixelInto(px1.R, px1.G, px1.B, ref yBlockRight, (i + j) * 8 + k + 1); int idx = 4 + (k / 2); - r[idx] += r0 + r1; - g[idx] += g0 + g1; - b[idx] += b0 + b1; + r[idx] += px0.R + px1.R; + g[idx] += px0.G + px1.G; + b[idx] += px0.B + px1.B; + } } From 2e25a3ee34ca3c21c9ade0a5c3c11131167a319b Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 4 Jun 2021 14:16:32 +0300 Subject: [PATCH 70/99] Optimized non-simd ycbcr lut converter code --- .../Encoder/RgbToYCbCrConverterLut.cs | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index 06e8f26b6..e26e73044 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -167,9 +167,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref Rgb24 rgbStart = ref rgbSpan[0]; for (int i = 0; i < 8; i += 2) { - Span r = stackalloc int[8]; - Span g = stackalloc int[8]; - Span b = stackalloc int[8]; + Span rgbTriplets = stackalloc int[24]; // 8 pixels by 3 integers for (int j = 0; j < 2; j++) { @@ -183,10 +181,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder Rgb24 px1 = Unsafe.Add(ref stride, k + 1); this.ConvertPixelInto(px1.R, px1.G, px1.B, ref yBlockLeft, (i + j) * 8 + k + 1); - int idx = k / 2; - r[idx] += px0.R + px1.R; - g[idx] += px0.G + px1.G; - b[idx] += px0.B + px1.B; + int idx = 3 * (k / 2); + rgbTriplets[idx] += px0.R + px1.R; + rgbTriplets[idx + 1] += px0.G + px1.G; + rgbTriplets[idx + 2] += px0.B + px1.B; } // right @@ -199,10 +197,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder Rgb24 px1 = Unsafe.Add(ref stride, k + 1); this.ConvertPixelInto(px1.R, px1.G, px1.B, ref yBlockRight, (i + j) * 8 + k + 1); - int idx = 4 + (k / 2); - r[idx] += px0.R + px1.R; - g[idx] += px0.G + px1.G; - b[idx] += px0.B + px1.B; + int idx = 3 * (4 + (k / 2)); + rgbTriplets[idx] += px0.R + px1.R; + rgbTriplets[idx + 1] += px0.G + px1.G; + rgbTriplets[idx + 2] += px0.B + px1.B; } } @@ -212,7 +210,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder + (i / 2) * 8; // which row for (int j = 0; j < 8; j++) { - this.ConvertPixelInto(r[j] / 4, g[j] / 4, b[j] / 4, ref cbBlock, ref crBlock, writeIdx + j); + int idx = j * 3; + this.ConvertPixelInto(rgbTriplets[idx] / 4, rgbTriplets[idx + 1] / 4, rgbTriplets[idx + 2] / 4, ref cbBlock, ref crBlock, writeIdx + j); } } } From 44bae0b79e8ee83dbbf5533c32f2eb34a33de490 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 4 Jun 2021 16:50:07 +0300 Subject: [PATCH 71/99] Made non-simd ycbcr lut converter code more readable --- .../Encoder/RgbToYCbCrConverterLut.cs | 54 ++++++++++++------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index e26e73044..18f5ee0e7 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -128,21 +128,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void ConvertPixelInto( - int r, - int g, - int b, - ref Block8x8F cbResult, - ref Block8x8F crResult, - int i) + private void ConvertPixelInto(int r, int g, int b, ref float yResult) => + // float y = (0.299F * r) + (0.587F * g) + (0.114F * b); + yResult = (this.YRTable[r] + this.YGTable[g] + this.YBTable[b]) >> ScaleBits; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ConvertPixelInto(int r, int g, int b, ref float cbResult, ref float crResult) { // float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)); - cbResult[i] = (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits; + cbResult = (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits; // float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)); - crResult[i] = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; + crResult = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; } + public void Convert(Span rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) { ref Rgb24 rgbStart = ref rgbSpan[0]; @@ -164,10 +164,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder public void Convert(Span rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) { + ref float yBlockLeftRef = ref Unsafe.As(ref yBlockLeft); + ref float yBlockRightRef = ref Unsafe.As(ref yBlockRight); + + // 0-31 or 32-63 + // upper or lower part + int chromaWriteOffset = row * Block8x8F.Size / 2; + ref float cbBlockRef = ref Unsafe.Add(ref Unsafe.As(ref cbBlock), chromaWriteOffset); + ref float crBlockRef = ref Unsafe.Add(ref Unsafe.As(ref crBlock), chromaWriteOffset); + ref Rgb24 rgbStart = ref rgbSpan[0]; + for (int i = 0; i < 8; i += 2) { - Span rgbTriplets = stackalloc int[24]; // 8 pixels by 3 integers + // 8 pixels by 3 integers + Span rgbTriplets = stackalloc int[24]; for (int j = 0; j < 2; j++) { @@ -175,11 +186,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref Rgb24 stride = ref Unsafe.Add(ref rgbStart, (i + j) * 16); for (int k = 0; k < 8; k += 2) { + ref float yBlockRef = ref Unsafe.Add(ref yBlockLeftRef, (i + j) * 8 + k); + Rgb24 px0 = Unsafe.Add(ref stride, k); - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockLeft, (i + j) * 8 + k); + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); Rgb24 px1 = Unsafe.Add(ref stride, k + 1); - this.ConvertPixelInto(px1.R, px1.G, px1.B, ref yBlockLeft, (i + j) * 8 + k + 1); + this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); int idx = 3 * (k / 2); rgbTriplets[idx] += px0.R + px1.R; @@ -191,11 +204,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder stride = ref Unsafe.Add(ref stride, 8); for (int k = 0; k < 8; k += 2) { + ref float yBlockRef = ref Unsafe.Add(ref yBlockRightRef, (i + j) * 8 + k); + Rgb24 px0 = Unsafe.Add(ref stride, k); - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRight, (i + j) * 8 + k); + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); Rgb24 px1 = Unsafe.Add(ref stride, k + 1); - this.ConvertPixelInto(px1.R, px1.G, px1.B, ref yBlockRight, (i + j) * 8 + k + 1); + this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); int idx = 3 * (4 + (k / 2)); rgbTriplets[idx] += px0.R + px1.R; @@ -205,13 +220,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - int writeIdx = - row * Block8x8F.Size / 2 // upper or lower part - + (i / 2) * 8; // which row + int writeIdx = 8 * (i / 2); for (int j = 0; j < 8; j++) { int idx = j * 3; - this.ConvertPixelInto(rgbTriplets[idx] / 4, rgbTriplets[idx + 1] / 4, rgbTriplets[idx + 2] / 4, ref cbBlock, ref crBlock, writeIdx + j); + this.ConvertPixelInto( + rgbTriplets[idx] / 4, // r + rgbTriplets[idx + 1] / 4, // g + rgbTriplets[idx + 2] / 4, // b + ref Unsafe.Add(ref cbBlockRef, writeIdx + j), + ref Unsafe.Add(ref crBlockRef, writeIdx + j)); } } } From 078703b595ecf204db96c34220b1d23ca9499b8a Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 4 Jun 2021 17:28:57 +0300 Subject: [PATCH 72/99] Added docs, renamed LuT converter for 444 and 420 subsampling methods, added debug guards --- .../Encoder/RgbToYCbCrConverterLut.cs | 32 +++++++++++++++---- .../YCbCrForwardConverter420{TPixel}.cs | 2 +- .../YCbCrForwardConverter444{TPixel}.cs | 2 +- .../Encoder/YCbCrForwardConverterBenchmark.cs | 2 +- .../Formats/Jpg/RgbToYCbCrConverterTests.cs | 2 +- 5 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index 18f5ee0e7..3706b8062 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -142,8 +142,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder crResult = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; } - - public void Convert(Span rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) + /// + /// Converts Rgb24 pixels into YCbCr color space with 4:4:4 subsampling sampling of luminance and chroma. + /// + /// Span of Rgb24 pixel data + /// Resulting Y values block + /// Resulting Cb values block + /// Resulting Cr values block + public void Convert444(Span rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) { ref Rgb24 rgbStart = ref rgbSpan[0]; @@ -162,8 +168,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - public void Convert(Span rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) + /// + /// Converts Rgb24 pixels into YCbCr color space with 4:2:0 subsampling of luminance and chroma. + /// + /// Calculates 2 out of 4 luminance blocks and half of chroma blocks. This method must be called twice per 4x 8x8 DCT blocks with different row param. + /// Span of Rgb24 pixel data + /// First or "left" resulting Y block + /// Second or "right" resulting Y block + /// Resulting Cb values block + /// Resulting Cr values block + /// Row index of the 16x16 block, 0 or 1 + public void Convert420(Span rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) { + DebugGuard.MustBeBetweenOrEqualTo(row, 0, 1, nameof(row)); + ref float yBlockLeftRef = ref Unsafe.As(ref yBlockLeft); ref float yBlockRightRef = ref Unsafe.As(ref yBlockRight); @@ -189,9 +207,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref float yBlockRef = ref Unsafe.Add(ref yBlockLeftRef, (i + j) * 8 + k); Rgb24 px0 = Unsafe.Add(ref stride, k); - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); - Rgb24 px1 = Unsafe.Add(ref stride, k + 1); + + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); int idx = 3 * (k / 2); @@ -207,9 +225,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder ref float yBlockRef = ref Unsafe.Add(ref yBlockRightRef, (i + j) * 8 + k); Rgb24 px0 = Unsafe.Add(ref stride, k); - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); - Rgb24 px1 = Unsafe.Add(ref stride, k + 1); + + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); int idx = 3 * (4 + (k / 2)); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs index 2e8433cdc..e0e7854b0 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs @@ -106,7 +106,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } else { - this.colorTables.Convert(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx); + this.colorTables.Convert420(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx); } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs index 27f7e3ae9..f3ae33934 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs @@ -128,7 +128,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } else { - this.colorTables.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + this.colorTables.Convert444(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } } diff --git a/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs b/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs index 1db407293..60a585384 100644 --- a/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs +++ b/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs @@ -37,7 +37,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Format.Jpeg.Components.Encoder Block8x8F cb = default; Block8x8F cr = default; - this.converter.Convert(this.data.AsSpan(), ref y, ref cb, ref cr); + this.converter.Convert444(this.data.AsSpan(), ref y, ref cb, ref cr); } [Benchmark] diff --git a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs index 9a6fc8d6f..c605a6cf8 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs @@ -32,7 +32,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Block8x8F cb = default; Block8x8F cr = default; - target.Convert(data.AsSpan(), ref y, ref cb, ref cr); + target.Convert444(data.AsSpan(), ref y, ref cb, ref cr); Verify(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(1F)); } From da1b85bee38b4e4ceded1c57d25ac13a2a0e8f22 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Fri, 4 Jun 2021 18:04:58 +0300 Subject: [PATCH 73/99] Final cleanup of the non-simd 420 rgb -> ycbcr conversion code --- .../Encoder/RgbToYCbCrConverterLut.cs | 62 +++++++++---------- 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index 3706b8062..7681063ee 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -200,45 +200,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int j = 0; j < 2; j++) { - // left + int yBlockWriteOffset = (i + j) * 8; ref Rgb24 stride = ref Unsafe.Add(ref rgbStart, (i + j) * 16); - for (int k = 0; k < 8; k += 2) - { - ref float yBlockRef = ref Unsafe.Add(ref yBlockLeftRef, (i + j) * 8 + k); - - Rgb24 px0 = Unsafe.Add(ref stride, k); - Rgb24 px1 = Unsafe.Add(ref stride, k + 1); - - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); - this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); - int idx = 3 * (k / 2); - rgbTriplets[idx] += px0.R + px1.R; - rgbTriplets[idx + 1] += px0.G + px1.G; - rgbTriplets[idx + 2] += px0.B + px1.B; - } + // left + this.ConvertChunk420(ref stride, ref Unsafe.Add(ref yBlockLeftRef, yBlockWriteOffset), rgbTriplets); // right - stride = ref Unsafe.Add(ref stride, 8); - for (int k = 0; k < 8; k += 2) - { - ref float yBlockRef = ref Unsafe.Add(ref yBlockRightRef, (i + j) * 8 + k); - - Rgb24 px0 = Unsafe.Add(ref stride, k); - Rgb24 px1 = Unsafe.Add(ref stride, k + 1); - - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); - this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); - - int idx = 3 * (4 + (k / 2)); - rgbTriplets[idx] += px0.R + px1.R; - rgbTriplets[idx + 1] += px0.G + px1.G; - rgbTriplets[idx + 2] += px0.B + px1.B; - - } + this.ConvertChunk420(ref Unsafe.Add(ref stride, 8), ref Unsafe.Add(ref yBlockRightRef, yBlockWriteOffset), rgbTriplets.Slice(12)); } int writeIdx = 8 * (i / 2); + ref float cbWriteRef = ref Unsafe.Add(ref cbBlockRef, writeIdx); + ref float crWriteRef = ref Unsafe.Add(ref crBlockRef, writeIdx); for (int j = 0; j < 8; j++) { int idx = j * 3; @@ -246,12 +220,32 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder rgbTriplets[idx] / 4, // r rgbTriplets[idx + 1] / 4, // g rgbTriplets[idx + 2] / 4, // b - ref Unsafe.Add(ref cbBlockRef, writeIdx + j), - ref Unsafe.Add(ref crBlockRef, writeIdx + j)); + ref Unsafe.Add(ref cbWriteRef, j), + ref Unsafe.Add(ref crWriteRef, j)); } } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ConvertChunk420(ref Rgb24 stride, ref float yBlock, Span chromaRgbTriplet) + { + for (int k = 0; k < 8; k += 2) + { + ref float yBlockRef = ref Unsafe.Add(ref yBlock, k); + + Rgb24 px0 = Unsafe.Add(ref stride, k); + Rgb24 px1 = Unsafe.Add(ref stride, k + 1); + + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); + this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); + + int idx = 3 * (k / 2); + chromaRgbTriplet[idx] += px0.R + px1.R; + chromaRgbTriplet[idx + 1] += px0.G + px1.G; + chromaRgbTriplet[idx + 2] += px0.B + px1.B; + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int Fix(float x) => (int)((x * (1L << ScaleBits)) + 0.5F); From 7135fc70963dd4c291375db79bd43fd8fb625f61 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 5 Jun 2021 03:08:13 +0300 Subject: [PATCH 74/99] Renamed MinimumBitsToStore16 method as it only works with up to 16 bits values --- src/ImageSharp/Common/Helpers/Numerics.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index 6bf06150b..ef457f7ce 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -854,7 +854,7 @@ namespace SixLabors.ImageSharp /// Unsigned integer to store /// Minimum number of bits needed to store given value [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int MinimumBitsToStore(uint number) + public static int MinimumBitsToStore16(uint number) { #if !SUPPORTS_BITOPERATIONS if (number < 0x100) From 743e34c489d68543f60935484aa0e7f1a847e0cd Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 5 Jun 2021 03:49:14 +0300 Subject: [PATCH 75/99] Fixed stream flush for jpeg encoder --- .../Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index 218b2b59c..fdeecc9d8 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -381,7 +381,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder int padBitsCount = 8 - (this.bitCount % 8); if (padBitsCount != 0) { - this.Emit(0xff, padBitsCount); + this.Emit((1 << padBitsCount) - 1, padBitsCount); } // flush remaining bytes From 01f44a839ed0a3f3ec5362f0f661a80611ed6ea1 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 5 Jun 2021 20:05:50 +0300 Subject: [PATCH 76/99] Renamed vectorized rgb -> ycbcr converter for 444 subsampling --- .../Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs | 2 +- .../Components/Encoder/YCbCrForwardConverter444{TPixel}.cs | 2 +- .../Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs | 2 +- .../ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index b9f0fa427..05a1b111f 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -63,7 +63,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// 8x8 destination matrix of Luminance(Y) converted data /// 8x8 destination matrix of Chrominance(Cb) converted data /// 8x8 destination matrix of Chrominance(Cr) converted data - public static void Convert(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) + public static void Convert444(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) { Debug.Assert(IsSupported, "AVX2 is required to run this converter"); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs index f3ae33934..0b7438725 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs @@ -124,7 +124,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder if (RgbToYCbCrConverterVectorized.IsSupported) { - RgbToYCbCrConverterVectorized.Convert(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + RgbToYCbCrConverterVectorized.Convert444(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } else { diff --git a/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs b/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs index 60a585384..9aafb6936 100644 --- a/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs +++ b/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs @@ -49,7 +49,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Format.Jpeg.Components.Encoder if (RgbToYCbCrConverterVectorized.IsSupported) { - RgbToYCbCrConverterVectorized.Convert(this.data.AsSpan(), ref y, ref cb, ref cr); + RgbToYCbCrConverterVectorized.Convert444(this.data.AsSpan(), ref y, ref cb, ref cr); } } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs index c605a6cf8..5f9d3f26d 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System; @@ -52,7 +52,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Block8x8F cb = default; Block8x8F cr = default; - RgbToYCbCrConverterVectorized.Convert(data.AsSpan(), ref y, ref cb, ref cr); + RgbToYCbCrConverterVectorized.Convert444(data.AsSpan(), ref y, ref cb, ref cr); Verify(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(0.0001F)); } From fcf202a913a3c623c877363cb4144a5b050dd15f Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sat, 5 Jun 2021 23:00:19 +0300 Subject: [PATCH 77/99] Added tests for 420 rgb -> ycbcr subsampling --- .../Formats/Jpg/RgbToYCbCrConverterTests.cs | 165 ++++++++++++++++-- 1 file changed, 152 insertions(+), 13 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs index 5f9d3f26d..fcc570c15 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs @@ -23,9 +23,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg private ITestOutputHelper Output { get; } [Fact] - public void TestLutConverter() + public void TestConverterLut444() { - Rgb24[] data = CreateTestData(); + int dataSize = 8 * 8; + Rgb24[] data = CreateTestData(dataSize); var target = RgbToYCbCrConverterLut.Create(); Block8x8F y = default; @@ -34,11 +35,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg target.Convert444(data.AsSpan(), ref y, ref cb, ref cr); - Verify(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(1F)); + Verify444(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(1F)); } [Fact] - public void TestVectorizedConverter() + public void TestConverterVectorized444() { if (!RgbToYCbCrConverterVectorized.IsSupported) { @@ -46,7 +47,8 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg return; } - Rgb24[] data = CreateTestData(); + int dataSize = 8 * 8; + Rgb24[] data = CreateTestData(dataSize); Block8x8F y = default; Block8x8F cb = default; @@ -54,10 +56,141 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg RgbToYCbCrConverterVectorized.Convert444(data.AsSpan(), ref y, ref cb, ref cr); - Verify(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(0.0001F)); + Verify444(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(0.0001F)); } - private static void Verify(ReadOnlySpan data, ref Block8x8F yResult, ref Block8x8F cbResult, ref Block8x8F crResult, ApproximateColorSpaceComparer comparer) + [Fact] + public void TestConverterLut420() + { + int dataSize = 16 * 16; + Span data = CreateTestData(dataSize).AsSpan(); + var target = RgbToYCbCrConverterLut.Create(); + + var yBlocks = new Block8x8F[4]; + var cb = default(Block8x8F); + var cr = default(Block8x8F); + + target.Convert420(data, ref yBlocks[0], ref yBlocks[1], ref cb, ref cr, 0); + target.Convert420(data.Slice(16 * 8), ref yBlocks[2], ref yBlocks[3], ref cb, ref cr, 1); + + Verify420(data, yBlocks, ref cb, ref cr, new ApproximateFloatComparer(1F)); + } + + [Fact] + public void TestConverterVectorized420() + { + if (!RgbToYCbCrConverterVectorized.IsSupported) + { + this.Output.WriteLine("No AVX and/or FMA present, skipping test!"); + return; + } + + int dataSize = 16 * 16; + Span data = CreateTestData(dataSize).AsSpan(); + + var yBlocks = new Block8x8F[4]; + var cb = default(Block8x8F); + var cr = default(Block8x8F); + + RgbToYCbCrConverterVectorized.Convert420_16x8(data, ref yBlocks[0], ref yBlocks[1], ref cb, ref cr, 0); + RgbToYCbCrConverterVectorized.Convert420_16x8(data.Slice(16 * 8), ref yBlocks[2], ref yBlocks[3], ref cb, ref cr, 1); + + Verify420(data, yBlocks, ref cb, ref cr, new ApproximateFloatComparer(1F)); + } + + + private static void Verify444( + ReadOnlySpan data, + ref Block8x8F yResult, + ref Block8x8F cbResult, + ref Block8x8F crResult, + ApproximateColorSpaceComparer comparer) + { + Block8x8F y = default; + Block8x8F cb = default; + Block8x8F cr = default; + + RgbToYCbCr(data, ref y, ref cb, ref cr); + + for (int i = 0; i < Block8x8F.Size; i++) + { + Assert.True(comparer.Equals(new YCbCr(y[i], cb[i], cr[i]), new YCbCr(yResult[i], cbResult[i], crResult[i])), $"Pos {i}, Expected {y[i]} == {yResult[i]}, {cb[i]} == {cbResult[i]}, {cr[i]} == {crResult[i]}"); + } + } + + private static void Verify420( + ReadOnlySpan data, + Block8x8F[] yResult, + ref Block8x8F cbResult, + ref Block8x8F crResult, + ApproximateFloatComparer comparer) + { + var tempBlock = default(Block8x8F); + var cbTrue = new Block8x8F[4]; + var crTrue = new Block8x8F[4]; + + Span tempData = new Rgb24[8 * 8].AsSpan(); + + // top left + Copy8x8(data, tempData); + RgbToYCbCr(tempData, ref tempBlock, ref cbTrue[0], ref crTrue[0]); + VerifyBlock(ref yResult[0], ref tempBlock, comparer); + + // top right + Copy8x8(data.Slice(8), tempData); + RgbToYCbCr(tempData, ref tempBlock, ref cbTrue[1], ref crTrue[1]); + VerifyBlock(ref yResult[1], ref tempBlock, comparer); + + // bottom left + Copy8x8(data.Slice(8 * 16), tempData); + RgbToYCbCr(tempData, ref tempBlock, ref cbTrue[2], ref crTrue[2]); + VerifyBlock(ref yResult[2], ref tempBlock, comparer); + + // bottom right + Copy8x8(data.Slice((8 * 16) + 8), tempData); + RgbToYCbCr(tempData, ref tempBlock, ref cbTrue[3], ref crTrue[3]); + VerifyBlock(ref yResult[3], ref tempBlock, comparer); + + // verify Cb + Scale16X16To8X8(ref tempBlock, cbTrue); + VerifyBlock(ref cbResult, ref tempBlock, comparer); + + // verify Cr + Scale16X16To8X8(ref tempBlock, crTrue); + VerifyBlock(ref crResult, ref tempBlock, comparer); + + + // extracts 8x8 blocks from 16x8 memory region + static void Copy8x8(ReadOnlySpan source, Span dest) + { + for (int i = 0; i < 8; i++) + { + source.Slice(i * 16, 8).CopyTo(dest.Slice(i * 8)); + } + } + + // scales 16x16 to 8x8, used in chroma subsampling tests + static void Scale16X16To8X8(ref Block8x8F dest, ReadOnlySpan source) + { + for (int i = 0; i < 4; i++) + { + int dstOff = ((i & 2) << 4) | ((i & 1) << 2); + Block8x8F iSource = source[i]; + + for (int y = 0; y < 4; y++) + { + for (int x = 0; x < 4; x++) + { + int j = (16 * y) + (2 * x); + float sum = iSource[j] + iSource[j + 1] + iSource[j + 8] + iSource[j + 9]; + dest[(8 * y) + x + dstOff] = (sum + 2) * .25F; + } + } + } + } + } + + private static void RgbToYCbCr(ReadOnlySpan data, ref Block8x8F y, ref Block8x8F cb, ref Block8x8F cr) { for (int i = 0; i < data.Length; i++) { @@ -65,17 +198,23 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg int g = data[i].G; int b = data[i].B; - float y = (0.299F * r) + (0.587F * g) + (0.114F * b); - float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)); - float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)); + y[i] = (0.299F * r) + (0.587F * g) + (0.114F * b); + cb[i] = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)); + cr[i] = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)); + } + } - Assert.True(comparer.Equals(new YCbCr(y, cb, cr), new YCbCr(yResult[i], cbResult[i], crResult[i])), $"Pos {i}, Expected {y} == {yResult[i]}, {cb} == {cbResult[i]}, {cr} == {crResult[i]}"); + private static void VerifyBlock(ref Block8x8F res, ref Block8x8F target, ApproximateFloatComparer comparer) + { + for (int i = 0; i < Block8x8F.Size; i++) + { + Assert.True(comparer.Equals(res[i], target[i]), $"Pos {i}, Expected {target[i]} == {res[i]}"); } } - private static Rgb24[] CreateTestData() + private static Rgb24[] CreateTestData(int size) { - var data = new Rgb24[64]; + var data = new Rgb24[size]; var r = new Random(); var random = new byte[3]; From ad333f6598c92a2b9faf6d02637b21880a0eb0d3 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 6 Jun 2021 15:39:12 +0300 Subject: [PATCH 78/99] Simplified Lut implementation --- .../Encoder/RgbToYCbCrConverterLut.cs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index 7681063ee..b301e8320 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -229,20 +229,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder [MethodImpl(MethodImplOptions.AggressiveInlining)] private void ConvertChunk420(ref Rgb24 stride, ref float yBlock, Span chromaRgbTriplet) { - for (int k = 0; k < 8; k += 2) + for (int i = 0; i < 8; i++) { - ref float yBlockRef = ref Unsafe.Add(ref yBlock, k); + Rgb24 px0 = Unsafe.Add(ref stride, i); - Rgb24 px0 = Unsafe.Add(ref stride, k); - Rgb24 px1 = Unsafe.Add(ref stride, k + 1); + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref Unsafe.Add(ref yBlock, i)); - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); - this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); - - int idx = 3 * (k / 2); - chromaRgbTriplet[idx] += px0.R + px1.R; - chromaRgbTriplet[idx + 1] += px0.G + px1.G; - chromaRgbTriplet[idx + 2] += px0.B + px1.B; + int idx = 3 * (i / 2); + chromaRgbTriplet[idx] += px0.R; + chromaRgbTriplet[idx + 1] += px0.G; + chromaRgbTriplet[idx + 2] += px0.B; } } From 0e053f0d6a62d621bd5d24b4685c19340815a4b5 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 7 Jun 2021 04:34:37 +0300 Subject: [PATCH 79/99] Optimized 420 converter with higher precision --- .../Encoder/RgbToYCbCrConverterLut.cs | 143 ++++++++++-------- 1 file changed, 78 insertions(+), 65 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index b301e8320..e1dcad1b6 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -92,6 +92,25 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder return tables; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private float CalculateY(byte r, byte g, byte b) + { + return (this.YRTable[r] + this.YGTable[g] + this.YBTable[b]) >> ScaleBits; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private float CalculateCb(byte r, byte g, byte b) + { + return (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private float CalculateCr(byte r, byte g, byte b) + { + return (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; + } + + /// /// Optimized method to allocates the correct y, cb, and cr values to the DCT blocks from the given r, g, b values. /// @@ -115,33 +134,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder crResult[i] = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void ConvertPixelInto( - int r, - int g, - int b, - ref Block8x8F yResult, - int i) - { - // float y = (0.299F * r) + (0.587F * g) + (0.114F * b); - yResult[i] = (this.YRTable[r] + this.YGTable[g] + this.YBTable[b]) >> ScaleBits; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void ConvertPixelInto(int r, int g, int b, ref float yResult) => - // float y = (0.299F * r) + (0.587F * g) + (0.114F * b); - yResult = (this.YRTable[r] + this.YGTable[g] + this.YBTable[b]) >> ScaleBits; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void ConvertPixelInto(int r, int g, int b, ref float cbResult, ref float crResult) - { - // float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)); - cbResult = (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits; - - // float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)); - crResult = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; - } - /// /// Converts Rgb24 pixels into YCbCr color space with 4:4:4 subsampling sampling of luminance and chroma. /// @@ -187,7 +179,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // 0-31 or 32-63 // upper or lower part - int chromaWriteOffset = row * Block8x8F.Size / 2; + int chromaWriteOffset = row * (Block8x8F.Size / 2); ref float cbBlockRef = ref Unsafe.Add(ref Unsafe.As(ref cbBlock), chromaWriteOffset); ref float crBlockRef = ref Unsafe.Add(ref Unsafe.As(ref crBlock), chromaWriteOffset); @@ -195,51 +187,72 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int i = 0; i < 8; i += 2) { - // 8 pixels by 3 integers - Span rgbTriplets = stackalloc int[24]; - - for (int j = 0; j < 2; j++) - { - int yBlockWriteOffset = (i + j) * 8; - ref Rgb24 stride = ref Unsafe.Add(ref rgbStart, (i + j) * 16); - - // left - this.ConvertChunk420(ref stride, ref Unsafe.Add(ref yBlockLeftRef, yBlockWriteOffset), rgbTriplets); - - // right - this.ConvertChunk420(ref Unsafe.Add(ref stride, 8), ref Unsafe.Add(ref yBlockRightRef, yBlockWriteOffset), rgbTriplets.Slice(12)); - } - - int writeIdx = 8 * (i / 2); - ref float cbWriteRef = ref Unsafe.Add(ref cbBlockRef, writeIdx); - ref float crWriteRef = ref Unsafe.Add(ref crBlockRef, writeIdx); - for (int j = 0; j < 8; j++) - { - int idx = j * 3; - this.ConvertPixelInto( - rgbTriplets[idx] / 4, // r - rgbTriplets[idx + 1] / 4, // g - rgbTriplets[idx + 2] / 4, // b - ref Unsafe.Add(ref cbWriteRef, j), - ref Unsafe.Add(ref crWriteRef, j)); - } + int yBlockWriteOffset = i * 8; + ref Rgb24 stride = ref Unsafe.Add(ref rgbStart, i * 16); + + int chromaOffset = 8 * (i / 2); + + // left + this.ConvertChunk420( + ref stride, + ref Unsafe.Add(ref yBlockLeftRef, yBlockWriteOffset), + ref Unsafe.Add(ref cbBlockRef, chromaOffset), + ref Unsafe.Add(ref crBlockRef, chromaOffset)); + + // right + this.ConvertChunk420( + ref Unsafe.Add(ref stride, 8), + ref Unsafe.Add(ref yBlockRightRef, yBlockWriteOffset), + ref Unsafe.Add(ref cbBlockRef, chromaOffset + 4), + ref Unsafe.Add(ref crBlockRef, chromaOffset + 4)); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void ConvertChunk420(ref Rgb24 stride, ref float yBlock, Span chromaRgbTriplet) + private void ConvertChunk420(ref Rgb24 stride, ref float yBlock, ref float cbBlock, ref float crBlock) { - for (int i = 0; i < 8; i++) + // jpeg 8x8 blocks are processed as 16x16 blocks with 16x8 subpasses (this is done for performance reasons) + // each row is 16 pixels wide thus +16 stride reference offset + // resulting luminance (Y`) are sampled at original resolution thus +8 reference offset + for (int k = 0; k < 8; k += 2) { - Rgb24 px0 = Unsafe.Add(ref stride, i); + ref float yBlockRef = ref Unsafe.Add(ref yBlock, k); + + // top row + Rgb24 px0 = Unsafe.Add(ref stride, k); + Rgb24 px1 = Unsafe.Add(ref stride, k + 1); + this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); + this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); + + // bottom row + Rgb24 px2 = Unsafe.Add(ref stride, k + 16); + Rgb24 px3 = Unsafe.Add(ref stride, k + 17); + this.ConvertPixelInto(px2.R, px2.G, px2.B, ref Unsafe.Add(ref yBlockRef, 8)); + this.ConvertPixelInto(px3.R, px3.G, px3.B, ref Unsafe.Add(ref yBlockRef, 9)); + + Unsafe.Add(ref cbBlock, k / 2) = this.CalculateAverageCb(px0, px1, px2, px3); + Unsafe.Add(ref crBlock, k / 2) = this.CalculateAverageCr(px0, px1, px2, px3); + } + } - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref Unsafe.Add(ref yBlock, i)); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private float CalculateAverageCb(Rgb24 px0, Rgb24 px1, Rgb24 px2, Rgb24 px3) + { + return 0.25f + * (this.CalculateCb(px0.R, px0.G, px0.B) + + this.CalculateCb(px1.R, px1.G, px1.B) + + this.CalculateCb(px2.R, px2.G, px2.B) + + this.CalculateCb(px3.R, px3.G, px3.B)); + } - int idx = 3 * (i / 2); - chromaRgbTriplet[idx] += px0.R; - chromaRgbTriplet[idx + 1] += px0.G; - chromaRgbTriplet[idx + 2] += px0.B; - } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private float CalculateAverageCr(Rgb24 px0, Rgb24 px1, Rgb24 px2, Rgb24 px3) + { + return 0.25f + * (this.CalculateCr(px0.R, px0.G, px0.B) + + this.CalculateCr(px1.R, px1.G, px1.B) + + this.CalculateCr(px2.R, px2.G, px2.B) + + this.CalculateCr(px3.R, px3.G, px3.B)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] From 2d54226caef366fe6c7c1e210d47cb70c4bf771c Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 7 Jun 2021 05:26:28 +0300 Subject: [PATCH 80/99] Both converters code cleanup --- .../Encoder/RgbToYCbCrConverterLut.cs | 51 +++++-------------- 1 file changed, 13 insertions(+), 38 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs index e1dcad1b6..15574a32a 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs @@ -95,43 +95,22 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder [MethodImpl(MethodImplOptions.AggressiveInlining)] private float CalculateY(byte r, byte g, byte b) { + // float y = (0.299F * r) + (0.587F * g) + (0.114F * b); return (this.YRTable[r] + this.YGTable[g] + this.YBTable[b]) >> ScaleBits; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private float CalculateCb(byte r, byte g, byte b) { + // float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)); return (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private float CalculateCr(byte r, byte g, byte b) { - return (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; - } - - - /// - /// Optimized method to allocates the correct y, cb, and cr values to the DCT blocks from the given r, g, b values. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void ConvertPixelInto( - int r, - int g, - int b, - ref Block8x8F yResult, - ref Block8x8F cbResult, - ref Block8x8F crResult, - int i) - { - // float y = (0.299F * r) + (0.587F * g) + (0.114F * b); - yResult[i] = (this.YRTable[r] + this.YGTable[g] + this.YBTable[b]) >> ScaleBits; - - // float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)); - cbResult[i] = (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits; - // float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)); - crResult[i] = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; + return (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits; } /// @@ -147,16 +126,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder for (int i = 0; i < Block8x8F.Size; i++) { - ref Rgb24 c = ref Unsafe.Add(ref rgbStart, i); - - this.ConvertPixelInto( - c.R, - c.G, - c.B, - ref yBlock, - ref cbBlock, - ref crBlock, - i); + Rgb24 c = Unsafe.Add(ref rgbStart, i); + + yBlock[i] = this.CalculateY(c.R, c.G, c.B); + cbBlock[i] = this.CalculateCb(c.R, c.G, c.B); + crBlock[i] = this.CalculateCr(c.R, c.G, c.B); } } @@ -221,15 +195,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // top row Rgb24 px0 = Unsafe.Add(ref stride, k); Rgb24 px1 = Unsafe.Add(ref stride, k + 1); - this.ConvertPixelInto(px0.R, px0.G, px0.B, ref yBlockRef); - this.ConvertPixelInto(px1.R, px1.G, px1.B, ref Unsafe.Add(ref yBlockRef, 1)); + yBlockRef = this.CalculateY(px0.R, px0.G, px0.B); + Unsafe.Add(ref yBlockRef, 1) = this.CalculateY(px1.R, px1.G, px1.B); // bottom row Rgb24 px2 = Unsafe.Add(ref stride, k + 16); Rgb24 px3 = Unsafe.Add(ref stride, k + 17); - this.ConvertPixelInto(px2.R, px2.G, px2.B, ref Unsafe.Add(ref yBlockRef, 8)); - this.ConvertPixelInto(px3.R, px3.G, px3.B, ref Unsafe.Add(ref yBlockRef, 9)); + Unsafe.Add(ref yBlockRef, 8) = this.CalculateY(px2.R, px2.G, px2.B); + Unsafe.Add(ref yBlockRef, 9) = this.CalculateY(px3.R, px3.G, px3.B); + // chroma average for 2x2 pixel block Unsafe.Add(ref cbBlock, k / 2) = this.CalculateAverageCb(px0, px1, px2, px3); Unsafe.Add(ref crBlock, k / 2) = this.CalculateAverageCr(px0, px1, px2, px3); } From 2949145981a454ebce528cdd7dd56f70987adce1 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 7 Jun 2021 05:27:02 +0300 Subject: [PATCH 81/99] Fixed failing tests output --- tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs index fcc570c15..9ec1bf603 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs @@ -208,7 +208,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg { for (int i = 0; i < Block8x8F.Size; i++) { - Assert.True(comparer.Equals(res[i], target[i]), $"Pos {i}, Expected {target[i]} == {res[i]}"); + Assert.True(comparer.Equals(res[i], target[i]), $"Pos {i}, Expected: {target[i]}, Got: {res[i]}"); } } From 8f79eb93c2442da2e9c8331f9267997df8c79316 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 7 Jun 2021 07:22:31 +0300 Subject: [PATCH 82/99] Converters tests/code cleanup, added comments for padding property --- .../Encoder/RgbToYCbCrConverterVectorized.cs | 155 +++--------------- .../YCbCrForwardConverter420{TPixel}.cs | 4 +- .../YCbCrForwardConverter444{TPixel}.cs | 2 +- .../Formats/Jpg/RgbToYCbCrConverterTests.cs | 30 ++-- 4 files changed, 39 insertions(+), 152 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 05a1b111f..49b974404 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -27,15 +27,33 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - public static int AvxRegisterRgbCompatibilityPadding + public static int AvxCompatibilityPadding { + // rgb byte matrices contain 8 strides by 8 pixels each, thus 64 pixels total + // Strides are stored sequentially - one big span of 64 * 3 = 192 bytes + // Each stride has exactly 3 * 8 = 24 bytes or 3 * 8 * 8 = 192 bits + // Avx registers are 256 bits so rgb span will be loaded with extra 64 bits from the next stride: + // stride 0 0 - 192 -(+64bits)-> 256 + // stride 1 192 - 384 -(+64bits)-> 448 + // stride 2 384 - 576 -(+64bits)-> 640 + // stride 3 576 - 768 -(+64bits)-> 832 + // stride 4 768 - 960 -(+64bits)-> 1024 + // stride 5 960 - 1152 -(+64bits)-> 1216 + // stride 6 1152 - 1344 -(+64bits)-> 1408 + // stride 7 1344 - 1536 -(+64bits)-> 1600 <-- READ ACCESS VIOLATION + // + // Total size of the 64 pixel rgb span: 64 * 3 * 8 = 1536 bits, avx operations require 1600 bits + // This is not permitted - we are reading foreign memory + // + // 8 byte padding to rgb byte span will solve this problem without extra code in converters get { +#if SUPPORTS_RUNTIME_INTRINSICS if (IsSupported) { return 8; } - +#endif return 0; } } @@ -89,26 +107,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder Vector256 rgb, rg, bx; Vector256 r, g, b; - // TODO: probably remove this after the draft - // rgbByteSpan contains 8 strides by 8 pixels each, thus 64 pixels total - // Strides are stored sequentially - one big span of 64 * 3 = 192 bytes - // Each stride has exactly 3 * 8 = 24 bytes or 3 * 8 * 8 = 192 bits - // Avx registers are 256 bits so rgb span will be loaded with extra 64 bits from the next stride: - // stride 0 0 - 192 -(+64bits)-> 256 - // stride 1 192 - 384 -(+64bits)-> 448 - // stride 2 384 - 576 -(+64bits)-> 640 - // stride 3 576 - 768 -(+64bits)-> 832 - // stride 4 768 - 960 -(+64bits)-> 1024 - // stride 5 960 - 1152 -(+64bits)-> 1216 - // stride 6 1152 - 1344 -(+64bits)-> 1408 - // stride 7 1344 - 1536 -(+64bits)-> 1600 <-- READ ACCESS VIOLATION - // - // Total size of the 64 pixel rgb span: 64 * 3 * 8 = 1536 bits, avx operations require 1600 bits - // This is not permitted - we are reading foreign memory - // That's why last stride is calculated outside of the for-loop loop with special extract shuffle mask involved - // - // Extra mask & separate stride:7 calculations can be eliminated by simply providing rgb pixel span of slightly bigger size than pixels data need: - // Total pixel data size is 192 bytes, avx registers need it to be 200 bytes const int bytesPerRgbStride = 24; for (int i = 0; i < 8; i++) { @@ -135,91 +133,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder #endif } - /// - /// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices with 4:2:0 subsampling - /// - /// Total size of rgb span must be 200 bytes - public static void Convert420(ReadOnlySpan rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock, int idx) - { - Debug.Assert(IsSupported, "AVX2 is required to run this converter"); - -#if SUPPORTS_RUNTIME_INTRINSICS - var f0299 = Vector256.Create(0.299f); - var f0587 = Vector256.Create(0.587f); - var f0114 = Vector256.Create(0.114f); - var fn0168736 = Vector256.Create(-0.168736f); - var fn0331264 = Vector256.Create(-0.331264f); - var f128 = Vector256.Create(128f); - var fn0418688 = Vector256.Create(-0.418688f); - var fn0081312F = Vector256.Create(-0.081312F); - var f05 = Vector256.Create(0.5f); - var zero = Vector256.Create(0).AsByte(); - - ref Vector256 rgbByteSpan = ref Unsafe.As>(ref MemoryMarshal.GetReference(rgbSpan)); - ref Vector256 destYRef = ref yBlock.V0; - - int destOffset = (idx & 2) * 4 + (idx & 1); - - ref Vector128 destCbRef = ref Unsafe.Add(ref Unsafe.As>(ref cbBlock), destOffset); - ref Vector128 destCrRef = ref Unsafe.Add(ref Unsafe.As>(ref crBlock), destOffset); - - var extractToLanesMask = Unsafe.As>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes)); - var extractRgbMask = Unsafe.As>(ref MemoryMarshal.GetReference(ExtractRgb)); - Vector256 rgb, rg, bx; - Vector256 r, g, b; - - Span> rDataLanes = stackalloc Vector256[4]; - Span> gDataLanes = stackalloc Vector256[4]; - Span> bDataLanes = stackalloc Vector256[4]; - - const int bytesPerRgbStride = 24; - for (int i = 0; i < 2; i++) - { - // each 4 lanes - [0, 1, 2, 3] & [4, 5, 6, 7] - for (int j = 0; j < 4; j++) - { - rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte(); - - rgb = Avx2.Shuffle(rgb, extractRgbMask); - - rg = Avx2.UnpackLow(rgb, zero); - bx = Avx2.UnpackHigh(rgb, zero); - - r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32()); - g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32()); - b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32()); - - // (0.299F * r) + (0.587F * g) + (0.114F * b); - Unsafe.Add(ref destYRef, i * 4 + j) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r); - - rDataLanes[j] = r; - gDataLanes[j] = g; - bDataLanes[j] = b; - } - - int localDestOffset = (i & 1) * 4; - - r = Scale_8x4_4x2(rDataLanes); - g = Scale_8x4_4x2(gDataLanes); - b = Scale_8x4_4x2(bDataLanes); - - // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) - Vector256 cb = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); - Unsafe.Add(ref destCbRef, localDestOffset) = cb.GetLower(); - Unsafe.Add(ref destCbRef, localDestOffset + 2) = cb.GetUpper(); - - // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) - Vector256 cr = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); - Unsafe.Add(ref destCrRef, localDestOffset) = cr.GetLower(); - Unsafe.Add(ref destCrRef, localDestOffset + 2) = cr.GetUpper(); - } -#endif - } - /// /// Converts 16x8 Rgb24 pixels matrix to 2 Y 8x8 matrices with 4:2:0 subsampling /// - public static void Convert420_16x8(ReadOnlySpan rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) + public static void Convert420(ReadOnlySpan rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row) { Debug.Assert(IsSupported, "AVX2 is required to run this converter"); @@ -337,36 +254,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SumVerticalPairs(Vector256 v0, Vector256 v1) => Avx.Add(Avx.Shuffle(v0, v1, 0b01_00_01_00), Avx.Shuffle(v0, v1, 0b11_10_11_10)); - - public static void ConvertCbCr(ref Block8x8F rBlock, ref Block8x8F gBlock, ref Block8x8F bBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock) - { - var fn0168736 = Vector256.Create(-0.168736f); - var fn0331264 = Vector256.Create(-0.331264f); - var f128 = Vector256.Create(128f); - var fn0418688 = Vector256.Create(-0.418688f); - var fn0081312F = Vector256.Create(-0.081312F); - var f05 = Vector256.Create(0.5f); - - ref Vector256 destCbRef = ref cbBlock.V0; - ref Vector256 destCrRef = ref crBlock.V0; - - ref Vector256 rRef = ref rBlock.V0; - ref Vector256 gRef = ref gBlock.V0; - ref Vector256 bRef = ref bBlock.V0; - - for (int i = 0; i < 8; i++) - { - ref Vector256 r = ref Unsafe.Add(ref rRef, i); - ref Vector256 g = ref Unsafe.Add(ref gRef, i); - ref Vector256 b = ref Unsafe.Add(ref bRef, i); - - // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) - Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); - - // 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b)) - Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r)); - } - } #endif } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs index e0e7854b0..9288acc7e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs @@ -77,7 +77,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // temporal pixel buffers this.pixelSpan = new TPixel[PixelsPerSample].AsSpan(); - this.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxRegisterRgbCompatibilityPadding].AsSpan()); + this.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxCompatibilityPadding].AsSpan()); // frame data this.samplingAreaSize = new Size(frame.Width, frame.Height); @@ -102,7 +102,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder if (RgbToYCbCrConverterVectorized.IsSupported) { - RgbToYCbCrConverterVectorized.Convert420_16x8(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx); + RgbToYCbCrConverterVectorized.Convert420(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx); } else { diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs index 0b7438725..d611aaf9e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs @@ -71,7 +71,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder // temporal pixel buffers this.pixelSpan = new TPixel[PixelsPerSample].AsSpan(); - this.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxRegisterRgbCompatibilityPadding].AsSpan()); + this.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxCompatibilityPadding].AsSpan()); // frame data this.samplingAreaSize = new Size(frame.Width, frame.Height); diff --git a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs index 9ec1bf603..d95191ffe 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs @@ -92,8 +92,8 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg var cb = default(Block8x8F); var cr = default(Block8x8F); - RgbToYCbCrConverterVectorized.Convert420_16x8(data, ref yBlocks[0], ref yBlocks[1], ref cb, ref cr, 0); - RgbToYCbCrConverterVectorized.Convert420_16x8(data.Slice(16 * 8), ref yBlocks[2], ref yBlocks[3], ref cb, ref cr, 1); + RgbToYCbCrConverterVectorized.Convert420(data, ref yBlocks[0], ref yBlocks[1], ref cb, ref cr, 0); + RgbToYCbCrConverterVectorized.Convert420(data.Slice(16 * 8), ref yBlocks[2], ref yBlocks[3], ref cb, ref cr, 1); Verify420(data, yBlocks, ref cb, ref cr, new ApproximateFloatComparer(1F)); } @@ -125,7 +125,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg ref Block8x8F crResult, ApproximateFloatComparer comparer) { - var tempBlock = default(Block8x8F); + var trueBlock = default(Block8x8F); var cbTrue = new Block8x8F[4]; var crTrue = new Block8x8F[4]; @@ -133,31 +133,31 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg // top left Copy8x8(data, tempData); - RgbToYCbCr(tempData, ref tempBlock, ref cbTrue[0], ref crTrue[0]); - VerifyBlock(ref yResult[0], ref tempBlock, comparer); + RgbToYCbCr(tempData, ref trueBlock, ref cbTrue[0], ref crTrue[0]); + VerifyBlock(ref yResult[0], ref trueBlock, comparer); // top right Copy8x8(data.Slice(8), tempData); - RgbToYCbCr(tempData, ref tempBlock, ref cbTrue[1], ref crTrue[1]); - VerifyBlock(ref yResult[1], ref tempBlock, comparer); + RgbToYCbCr(tempData, ref trueBlock, ref cbTrue[1], ref crTrue[1]); + VerifyBlock(ref yResult[1], ref trueBlock, comparer); // bottom left Copy8x8(data.Slice(8 * 16), tempData); - RgbToYCbCr(tempData, ref tempBlock, ref cbTrue[2], ref crTrue[2]); - VerifyBlock(ref yResult[2], ref tempBlock, comparer); + RgbToYCbCr(tempData, ref trueBlock, ref cbTrue[2], ref crTrue[2]); + VerifyBlock(ref yResult[2], ref trueBlock, comparer); // bottom right Copy8x8(data.Slice((8 * 16) + 8), tempData); - RgbToYCbCr(tempData, ref tempBlock, ref cbTrue[3], ref crTrue[3]); - VerifyBlock(ref yResult[3], ref tempBlock, comparer); + RgbToYCbCr(tempData, ref trueBlock, ref cbTrue[3], ref crTrue[3]); + VerifyBlock(ref yResult[3], ref trueBlock, comparer); // verify Cb - Scale16X16To8X8(ref tempBlock, cbTrue); - VerifyBlock(ref cbResult, ref tempBlock, comparer); + Scale16X16To8X8(ref trueBlock, cbTrue); + VerifyBlock(ref cbResult, ref trueBlock, comparer); // verify Cr - Scale16X16To8X8(ref tempBlock, crTrue); - VerifyBlock(ref crResult, ref tempBlock, comparer); + Scale16X16To8X8(ref trueBlock, crTrue); + VerifyBlock(ref crResult, ref trueBlock, comparer); // extracts 8x8 blocks from 16x8 memory region From b1a21269a0d5bdfaf4315559b0803a8f0cd2a15a Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 7 Jun 2021 07:34:02 +0300 Subject: [PATCH 83/99] Added docs --- .../Encoder/YCbCrForwardConverter420{TPixel}.cs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs index 9288acc7e..987ca6463 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs @@ -16,13 +16,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder internal ref struct YCbCrForwardConverter420 where TPixel : unmanaged, IPixel { - // TODO: docs + /// + /// Number of pixels processed per single call + /// private const int PixelsPerSample = 16 * 8; - // TODO: docs - private static int RgbSpanByteSize = PixelsPerSample * 3; + /// + /// Total byte size of processed pixels converted from TPixel to + /// + private const int RgbSpanByteSize = PixelsPerSample * 3; - // TODO: docs + /// + /// of sampling area from given frame pixel buffer + /// private static readonly Size SampleSize = new Size(16, 8); /// From 2edb1a8bb96627a57f23588ab564dd04432c4c53 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 7 Jun 2021 07:39:44 +0300 Subject: [PATCH 84/99] Removed obsolete code --- .../YCbCrForwardConverter444{TPixel}.cs | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs index d611aaf9e..91e56cab2 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs @@ -88,27 +88,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } } - public static YCbCrForwardConverter444 Create() - { - var result = default(YCbCrForwardConverter444); - - // creating rgb pixel bufferr - // TODO: this is subject to discuss - // converter.Convert comments for +8 padding - result.rgbSpan = MemoryMarshal.Cast(new byte[RgbSpanByteSize + 8].AsSpan()); - - // TODO: this is subject to discuss - result.pixelSpan = new TPixel[PixelsPerSample].AsSpan(); - - // Avoid creating lookup tables, when vectorized converter is supported - if (!RgbToYCbCrConverterVectorized.IsSupported) - { - result.colorTables = RgbToYCbCrConverterLut.Create(); - } - - return result; - } - /// /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) /// From 0aecbd023d0003fb8fb7baf157ae3bc781a0e4f7 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 7 Jun 2021 07:41:15 +0300 Subject: [PATCH 85/99] Removed unused usings --- .../Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs | 2 -- .../Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs | 1 - 2 files changed, 3 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs index fdeecc9d8..ca352397b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs @@ -1,9 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. -using System; using System.IO; -using System.Numerics; using System.Runtime.CompilerServices; using System.Threading; using SixLabors.ImageSharp.Memory; diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs index 91e56cab2..1ef8246ff 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs @@ -2,7 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.PixelFormats; From a4222fd91cfb1b9b5597455860417dff68d76526 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 7 Jun 2021 08:11:43 +0300 Subject: [PATCH 86/99] Added DCT tests --- .../Jpeg/Components/FastFloatingPointDCT.cs | 2 +- .../ImageSharp.Tests/Formats/Jpg/DCTTests.cs | 207 +++++++++++++----- 2 files changed, 159 insertions(+), 50 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index afcf4158b..ad2e290f6 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -203,7 +203,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Source /// Destination - private static void FDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d) + public static void FDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d) { #if SUPPORTS_RUNTIME_INTRINSICS Debug.Assert(Avx.IsSupported, "AVX is required to execute this method"); diff --git a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs index 75ad5427c..99dce57c7 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs @@ -2,7 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; - +using System.Runtime.Intrinsics.X86; using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; @@ -22,94 +22,160 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg { } - [Fact] - public void IDCT2D8x4_LeftPart() + // Reference tests + [Theory] + [InlineData(1)] + [InlineData(2)] + [InlineData(3)] + public void LLM_TransformIDCT_CompareToNonOptimized(int seed) { - float[] sourceArray = Create8x8FloatData(); - var expectedDestArray = new float[64]; + float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed); - ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(sourceArray, expectedDestArray); + var source = Block8x8F.Load(sourceArray); - var source = default(Block8x8F); - source.LoadFrom(sourceArray); + Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref source); - var dest = default(Block8x8F); + var temp = default(Block8x8F); + var actual = default(Block8x8F); + FastFloatingPointDCT.TransformIDCT(ref source, ref actual, ref temp); - FastFloatingPointDCT.IDCT8x4_LeftPart(ref source, ref dest); + this.CompareBlocks(expected, actual, 1f); + } - var actualDestArray = new float[64]; - dest.ScaledCopyTo(actualDestArray); + [Theory] + [InlineData(1)] + [InlineData(2)] + [InlineData(3)] + public void LLM_TransformIDCT_CompareToAccurate(int seed) + { + float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed); + + var source = Block8x8F.Load(sourceArray); + + Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref source); - this.Print8x8Data(expectedDestArray); - this.Output.WriteLine("**************"); - this.Print8x8Data(actualDestArray); + var temp = default(Block8x8F); + var actual = default(Block8x8F); + FastFloatingPointDCT.TransformIDCT(ref source, ref actual, ref temp); - Assert.Equal(expectedDestArray, actualDestArray); + this.CompareBlocks(expected, actual, 1f); } - [Fact] - public void IDCT2D8x4_RightPart() + + // Inverse transform + [Theory] + [InlineData(1)] + [InlineData(2)] + public void IDCT8x4_LeftPart(int seed) { - float[] sourceArray = Create8x8FloatData(); - var expectedDestArray = new float[64]; + Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); + var srcBlock = default(Block8x8F); + srcBlock.LoadFrom(src); + + var destBlock = default(Block8x8F); + + var expectedDest = new float[64]; + + // reference + ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest); - ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(sourceArray.AsSpan(4), expectedDestArray.AsSpan(4)); + // testee + FastFloatingPointDCT.IDCT8x4_LeftPart(ref srcBlock, ref destBlock); + + var actualDest = new float[64]; + destBlock.ScaledCopyTo(actualDest); + + Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); + } + + [Theory] + [InlineData(1)] + [InlineData(2)] + public void IDCT8x4_RightPart(int seed) + { + Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); + var srcBlock = default(Block8x8F); + srcBlock.LoadFrom(src); - var source = default(Block8x8F); - source.LoadFrom(sourceArray); + var destBlock = default(Block8x8F); - var dest = default(Block8x8F); + var expectedDest = new float[64]; - FastFloatingPointDCT.IDCT8x4_RightPart(ref source, ref dest); + // reference + ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4)); - var actualDestArray = new float[64]; - dest.ScaledCopyTo(actualDestArray); + // testee + FastFloatingPointDCT.IDCT8x4_RightPart(ref srcBlock, ref destBlock); - this.Print8x8Data(expectedDestArray); - this.Output.WriteLine("**************"); - this.Print8x8Data(actualDestArray); + var actualDest = new float[64]; + destBlock.ScaledCopyTo(actualDest); - Assert.Equal(expectedDestArray, actualDestArray); + Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); } [Theory] [InlineData(1)] [InlineData(2)] - [InlineData(3)] - public void LLM_TransformIDCT_CompareToNonOptimized(int seed) + public void IDCT8x8_Avx(int seed) { - float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed); + if (!Avx.IsSupported) + { + this.Output.WriteLine("No AVX present, skipping test!"); + return; + } - var source = Block8x8F.Load(sourceArray); + Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); + var srcBlock = default(Block8x8F); + srcBlock.LoadFrom(src); - Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref source); + var destBlock = default(Block8x8F); - var temp = default(Block8x8F); - var actual = default(Block8x8F); - FastFloatingPointDCT.TransformIDCT(ref source, ref actual, ref temp); + var expectedDest = new float[64]; - this.CompareBlocks(expected, actual, 1f); + // reference, left part + ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest); + + // reference, right part + ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4)); + + // testee, whole 8x8 + FastFloatingPointDCT.IDCT8x8_Avx(ref srcBlock, ref destBlock); + + var actualDest = new float[64]; + destBlock.ScaledCopyTo(actualDest); + + Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); } [Theory] [InlineData(1)] [InlineData(2)] - [InlineData(3)] - public void LLM_TransformIDCT_CompareToAccurate(int seed) + public void TransformIDCT(int seed) { - float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed); + Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); + var srcBlock = default(Block8x8F); + srcBlock.LoadFrom(src); - var source = Block8x8F.Load(sourceArray); + var destBlock = default(Block8x8F); - Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref source); + var expectedDest = new float[64]; + var temp1 = new float[64]; + var temp2 = default(Block8x8F); - var temp = default(Block8x8F); - var actual = default(Block8x8F); - FastFloatingPointDCT.TransformIDCT(ref source, ref actual, ref temp); + // reference + ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp1); - this.CompareBlocks(expected, actual, 1f); + // testee + FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref destBlock, ref temp2); + + var actualDest = new float[64]; + destBlock.ScaledCopyTo(actualDest); + + Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); } + + // Forward transform [Theory] [InlineData(1)] [InlineData(2)] @@ -123,7 +189,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg var expectedDest = new float[64]; + // reference ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D8x4_32f(src, expectedDest); + + // testee FastFloatingPointDCT.FDCT8x4_LeftPart(ref srcBlock, ref destBlock); var actualDest = new float[64]; @@ -145,7 +214,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg var expectedDest = new float[64]; + // reference ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4)); + + // testee FastFloatingPointDCT.FDCT8x4_RightPart(ref srcBlock, ref destBlock); var actualDest = new float[64]; @@ -154,6 +226,40 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); } + [Theory] + [InlineData(1)] + [InlineData(2)] + public void FDCT8x8_Avx(int seed) + { + if (!Avx.IsSupported) + { + this.Output.WriteLine("No AVX present, skipping test!"); + return; + } + + Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); + var srcBlock = default(Block8x8F); + srcBlock.LoadFrom(src); + + var destBlock = default(Block8x8F); + + var expectedDest = new float[64]; + + // reference, left part + ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D8x4_32f(src, expectedDest); + + // reference, right part + ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4)); + + // testee, whole 8x8 + FastFloatingPointDCT.FDCT8x8_Avx(ref srcBlock, ref destBlock); + + var actualDest = new float[64]; + destBlock.ScaledCopyTo(actualDest); + + Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); + } + [Theory] [InlineData(1)] [InlineData(2)] @@ -169,7 +275,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg var temp1 = new float[64]; var temp2 = default(Block8x8F); + // reference ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true); + + // testee FastFloatingPointDCT.TransformFDCT(ref srcBlock, ref destBlock, ref temp2, false); var actualDest = new float[64]; From 8a61048a5c73ee5cc025fcefb8860168cad97c94 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 7 Jun 2021 08:37:13 +0300 Subject: [PATCH 87/99] Fixed DCT tests --- tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs index 99dce57c7..fd5e5b005 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs @@ -2,7 +2,9 @@ // Licensed under the Apache License, Version 2.0. using System; +#if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics.X86; +#endif using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; @@ -118,7 +120,13 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [InlineData(2)] public void IDCT8x8_Avx(int seed) { - if (!Avx.IsSupported) +#if SUPPORTS_RUNTIME_INTRINSICS + var skip = !Avx.IsSupported; +#else + var skip = true; +#endif + + if (skip) { this.Output.WriteLine("No AVX present, skipping test!"); return; @@ -231,7 +239,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [InlineData(2)] public void FDCT8x8_Avx(int seed) { - if (!Avx.IsSupported) +#if SUPPORTS_RUNTIME_INTRINSICS + var skip = !Avx.IsSupported; +#else + var skip = true; +#endif + if (skip) { this.Output.WriteLine("No AVX present, skipping test!"); return; From b9b853b5239cbe5ada16370b624cad7794a2067e Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 7 Jun 2021 16:42:26 +0300 Subject: [PATCH 88/99] Added docs & stylecop fixes --- .../YCbCrForwardConverter420{TPixel}.cs | 10 ++++---- .../YCbCrForwardConverter444{TPixel}.cs | 23 +++++++++++++------ .../Jpeg/Components/FastFloatingPointDCT.cs | 2 -- .../Formats/Jpeg/JpegEncoderCore.cs | 5 ---- .../ImageSharp.Tests/Formats/Jpg/DCTTests.cs | 2 -- 5 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs index 987ca6463..a4abd532b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs @@ -2,7 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.PixelFormats; @@ -66,13 +65,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// private Span rgbSpan; - // TODO: docs + /// + /// Sampled pixel buffer size + /// private Size samplingAreaSize; - // TODO: docs + /// + /// for internal operations + /// private Configuration config; - public YCbCrForwardConverter420(ImageFrame frame) { // matrices would be filled during convert calls diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs index 1ef8246ff..ef589272b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs @@ -15,16 +15,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder internal ref struct YCbCrForwardConverter444 where TPixel : unmanaged, IPixel { - // TODO: docs + /// + /// Number of pixels processed per single call + /// private const int PixelsPerSample = 8 * 8; - // TODO: docs + /// + /// Total byte size of processed pixels converted from TPixel to + /// private const int RgbSpanByteSize = PixelsPerSample * 3; - // TODO: docs + /// + /// of sampling area from given frame pixel buffer + /// private static readonly Size SampleSize = new Size(8, 8); - /// /// The Y component /// @@ -55,11 +60,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder /// private Span rgbSpan; - // TODO: docs + /// + /// Sampled pixel buffer size + /// private Size samplingAreaSize; - // TODO: docs - private readonly Configuration config; + /// + /// for internal operations + /// + private Configuration config; public YCbCrForwardConverter444(ImageFrame frame) { diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index ad2e290f6..f31d07efc 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -273,7 +273,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Apply floating point FDCT from src into dest /// - /// /// Source /// Destination /// Temporary block provided by the caller for optimization @@ -467,7 +466,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Vector256 mb1 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my3, C_V_3_0727), mz2); Vector256 mb0 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my1, C_V_1_5013), mz3); - Vector256 my2 = s.V2; Vector256 my6 = s.V6; mz4 = Avx.Multiply(Avx.Add(my2, my6), C_V_0_5411); diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index c68c0ffb0..6020e6196 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -5,14 +5,11 @@ using System; using System.Buffers.Binary; using System.IO; using System.Linq; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using System.Threading; using SixLabors.ImageSharp.Common.Helpers; using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder; using SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder; -using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Metadata; using SixLabors.ImageSharp.Metadata.Profiles.Exif; using SixLabors.ImageSharp.Metadata.Profiles.Icc; @@ -69,7 +66,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg 99, 99, 99, 99, 99, 99, 99, 99, }; - /// /// A scratch buffer to reduce allocations. /// @@ -625,7 +621,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg private void WriteStartOfScan(Image image, int componentCount, CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) Span componentId = stackalloc byte[] { 0x01, diff --git a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs index fd5e5b005..606a5678b 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs @@ -63,7 +63,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.CompareBlocks(expected, actual, 1f); } - // Inverse transform [Theory] [InlineData(1)] @@ -182,7 +181,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); } - // Forward transform [Theory] [InlineData(1)] From 8d321a5dc205252b540a30ccbed49cabe14c6320 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 7 Jun 2021 17:49:10 +0300 Subject: [PATCH 89/99] Added DCT tests paths for nosimd/avx/avx+fma --- .../ImageSharp.Tests/Formats/Jpg/DCTTests.cs | 90 +++++++++++++------ 1 file changed, 61 insertions(+), 29 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs index 606a5678b..d49a6498c 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs @@ -7,7 +7,7 @@ using System.Runtime.Intrinsics.X86; #endif using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; - +using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; @@ -159,26 +159,42 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [InlineData(2)] public void TransformIDCT(int seed) { - Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); - var srcBlock = default(Block8x8F); - srcBlock.LoadFrom(src); + static void RunTest(string serialized) + { + int seed = FeatureTestRunner.Deserialize(serialized); - var destBlock = default(Block8x8F); + Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); + var srcBlock = default(Block8x8F); + srcBlock.LoadFrom(src); - var expectedDest = new float[64]; - var temp1 = new float[64]; - var temp2 = default(Block8x8F); + var destBlock = default(Block8x8F); - // reference - ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp1); + var expectedDest = new float[64]; + var temp1 = new float[64]; + var temp2 = default(Block8x8F); - // testee - FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref destBlock, ref temp2); + // reference + ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp1); - var actualDest = new float[64]; - destBlock.ScaledCopyTo(actualDest); + // testee + FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref destBlock, ref temp2); - Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); + var actualDest = new float[64]; + destBlock.ScaledCopyTo(actualDest); + + Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); + } + + // 3 paths: + // 1. AllowAll - call avx/fma implementation + // 2. DisableFMA - call avx implementation without fma acceleration + // 3. DisableAvx - call fallback code of Vector4 implementation + // + // DisableSSE isn't needed because fallback Vector4 code will compile to either sse or fallback code with same result + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + seed, + HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX); } // Forward transform @@ -276,26 +292,42 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [InlineData(2)] public void TransformFDCT(int seed) { - Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); - var srcBlock = default(Block8x8F); - srcBlock.LoadFrom(src); + static void RunTest(string serialized) + { + int seed = FeatureTestRunner.Deserialize(serialized); - var destBlock = default(Block8x8F); + Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); + var srcBlock = default(Block8x8F); + srcBlock.LoadFrom(src); - var expectedDest = new float[64]; - var temp1 = new float[64]; - var temp2 = default(Block8x8F); + var destBlock = default(Block8x8F); - // reference - ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true); + var expectedDest = new float[64]; + var temp1 = new float[64]; + var temp2 = default(Block8x8F); - // testee - FastFloatingPointDCT.TransformFDCT(ref srcBlock, ref destBlock, ref temp2, false); + // reference + ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true); - var actualDest = new float[64]; - destBlock.ScaledCopyTo(actualDest); + // testee + FastFloatingPointDCT.TransformFDCT(ref srcBlock, ref destBlock, ref temp2, false); - Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); + var actualDest = new float[64]; + destBlock.ScaledCopyTo(actualDest); + + Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); + } + + // 3 paths: + // 1. AllowAll - call avx/fma implementation + // 2. DisableFMA - call avx implementation without fma acceleration + // 3. DisableAvx - call fallback code of Vector4 implementation + // + // DisableSSE isn't needed because fallback Vector4 code will compile to either sse or fallback code with same result + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + seed, + HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX); } } } From 0e07a8ed6187721125b6a490b4f48a3bb6081a1b Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 7 Jun 2021 18:40:12 +0300 Subject: [PATCH 90/99] Removed obsolete code --- .../Formats/Jpeg/Components/Block8x8F.cs | 75 ------------------- .../Block8x8F_Scale16X16To8X8.cs | 38 ---------- 2 files changed, 113 deletions(-) delete mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Scale16X16To8X8.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 0acc6408e..8ca7b0c80 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -477,81 +477,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components DivideRoundAll(ref dest, ref qt); } - /// - /// Scales the 16x16 region represented by the 4 source blocks to the 8x8 DST block. - /// - /// The destination block. - /// The source block. - public static unsafe void Scale16X16To8X8(ref Block8x8F destination, ReadOnlySpan source) - { -#if SUPPORTS_RUNTIME_INTRINSICS - if (Avx2.IsSupported) - { - Scale16X16To8X8Vectorized(ref destination, source); - return; - } -#endif - - Scale16X16To8X8Scalar(ref destination, source); - } - - private static void Scale16X16To8X8Vectorized(ref Block8x8F destination, ReadOnlySpan source) - { -#if SUPPORTS_RUNTIME_INTRINSICS - Debug.Assert(Avx2.IsSupported, "AVX2 is required to execute this method"); - - var f2 = Vector256.Create(2f); - var f025 = Vector256.Create(0.25f); - Vector256 switchInnerDoubleWords = Unsafe.As>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32)); - ref Vector256 destRef = ref destination.V0; - - for (int i = 0; i < 2; i++) - { - ref Vector256 in1 = ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 2 * i).V0; - ref Vector256 in2 = ref Unsafe.Add(ref MemoryMarshal.GetReference(source), (2 * i) + 1).V0; - - for (int j = 0; j < 8; j += 2) - { - Vector256 a = Unsafe.Add(ref in1, j); - Vector256 b = Unsafe.Add(ref in1, j + 1); - Vector256 c = Unsafe.Add(ref in2, j); - Vector256 d = Unsafe.Add(ref in2, j + 1); - - Vector256 calc1 = Avx.Shuffle(a, c, 0b10_00_10_00); - Vector256 calc2 = Avx.Shuffle(a, c, 0b11_01_11_01); - Vector256 calc3 = Avx.Shuffle(b, d, 0b10_00_10_00); - Vector256 calc4 = Avx.Shuffle(b, d, 0b11_01_11_01); - - Vector256 sum = Avx.Add(Avx.Add(calc1, calc2), Avx.Add(calc3, calc4)); - Vector256 add = Avx.Add(sum, f2); - Vector256 res = Avx.Multiply(add, f025); - - destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords); - destRef = ref Unsafe.Add(ref destRef, 1); - } - } -#endif - } - - private static unsafe void Scale16X16To8X8Scalar(ref Block8x8F destination, ReadOnlySpan source) - { - for (int i = 0; i < 4; i++) - { - int dstOff = ((i & 2) << 4) | ((i & 1) << 2); - Block8x8F iSource = source[i]; - - for (int y = 0; y < 4; y++) - { - for (int x = 0; x < 4; x++) - { - int j = (16 * y) + (2 * x); - float sum = iSource[j] + iSource[j + 1] + iSource[j + 8] + iSource[j + 9]; - destination[(8 * y) + x + dstOff] = (sum + 2) * .25F; - } - } - } - } - [MethodImpl(InliningOptions.ShortMethod)] private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b) { diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Scale16X16To8X8.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Scale16X16To8X8.cs deleted file mode 100644 index ebd3e4013..000000000 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Scale16X16To8X8.cs +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using BenchmarkDotNet.Attributes; -using SixLabors.ImageSharp.Formats.Jpeg.Components; - -namespace SixLabors.ImageSharp.Benchmarks.Format.Jpeg.Components -{ - [Config(typeof(Config.HwIntrinsics_SSE_AVX))] - public class Block8x8F_Scale16X16To8X8 - { - private Block8x8F source; - private readonly Block8x8F[] target = new Block8x8F[4]; - - [GlobalSetup] - public void Setup() - { - var random = new Random(); - - float[] f = new float[8 * 8]; - for (int i = 0; i < f.Length; i++) - { - f[i] = (float)random.NextDouble(); - } - - for (int i = 0; i < 4; i++) - { - this.target[i] = Block8x8F.Load(f); - } - - this.source = Block8x8F.Load(f); - } - - [Benchmark] - public void Scale16X16To8X8() => Block8x8F.Scale16X16To8X8(ref this.source, this.target); - } -} From 0013c54460e1b775f3daa530305092eacc9623c5 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Wed, 9 Jun 2021 15:49:04 +0300 Subject: [PATCH 91/99] Optimized vector rgb pixel matrix scaling --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 18 ++++++++++++ .../Encoder/RgbToYCbCrConverterVectorized.cs | 28 ++----------------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 00c0d89f0..caeb694a9 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -577,6 +577,24 @@ namespace SixLabors.ImageSharp } } + /// + /// Scales 8x8 matrix to 4x2 using 2x2 average + /// + /// Input matrix consisting of 4 256bit vectors, first row: (v[0], v[2]), second row: (v[1], v[3]) + /// 256bit vector containing upper and lower scaled parts of the input matrix + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Scale16x2_8x1(ReadOnlySpan> v) + { + DebugGuard.IsTrue(v.Length == 4, "Input span must consist of 4 elements"); + + var f025 = Vector256.Create(0.25f); + + Vector256 left = Avx.Add(v[0], v[2]); + Vector256 right = Avx.Add(v[1], v[3]); + Vector256 avg2x2 = Avx.Multiply(Avx.HorizontalAdd(left, right), f025); + + return Avx2.Permute4x64(avg2x2.AsDouble(), 0b11_01_10_00).AsSingle(); + } /// /// as many elements as possible, slicing them down (keeping the remainder). diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 49b974404..56da8acc7 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -221,9 +221,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder bDataLanes[j] = b; } - r = Scale_8x4_4x2(rDataLanes); - g = Scale_8x4_4x2(gDataLanes); - b = Scale_8x4_4x2(bDataLanes); + r = SimdUtils.HwIntrinsics.Scale16x2_8x1(rDataLanes); + g = SimdUtils.HwIntrinsics.Scale16x2_8x1(gDataLanes); + b = SimdUtils.HwIntrinsics.Scale16x2_8x1(bDataLanes); // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); @@ -233,27 +233,5 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } #endif } - -#if SUPPORTS_RUNTIME_INTRINSICS - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 Scale_8x4_4x2(Span> v) - { - Vector256 switchInnerDoubleWords = Unsafe.As>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32)); - var f025 = Vector256.Create(0.25f); - - Vector256 topPairSum = SumHorizontalPairs(v[0], v[2]); - Vector256 botPairSum = SumHorizontalPairs(v[1], v[3]); - - return Avx2.PermuteVar8x32(Avx.Multiply(SumVerticalPairs(topPairSum, botPairSum), f025), switchInnerDoubleWords); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 SumHorizontalPairs(Vector256 v0, Vector256 v1) - => Avx.Add(Avx.Shuffle(v0, v1, 0b10_00_10_00), Avx.Shuffle(v0, v1, 0b11_01_11_01)); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 SumVerticalPairs(Vector256 v0, Vector256 v1) - => Avx.Add(Avx.Shuffle(v0, v1, 0b01_00_01_00), Avx.Shuffle(v0, v1, 0b11_10_11_10)); -#endif } } From 35daf2110f2196ce47853e167d4eb1df2e265b26 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 10 Jun 2021 03:59:26 +0300 Subject: [PATCH 92/99] Added tests for vector rgb pixel matrix scaling --- .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 1f680aa6c..69f1b20fb 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -5,6 +5,8 @@ using System; using System.Linq; using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; #if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics.X86; #endif @@ -358,6 +360,44 @@ namespace SixLabors.ImageSharp.Tests.Common SimdUtils.PackFromRgbPlanes(Configuration.Default, r, g, b, actual)); } +#if SUPPORTS_RUNTIME_INTRINSICS + [Theory] + [InlineData(1)] + [InlineData(2)] + [InlineData(3)] + public void Scale16x2_8x1(int seed) + { + if (!Avx.IsSupported) + { + return; + } + + Span data = new Random(seed).GenerateRandomFloatArray(Vector256.Count * 4, -1000, 1000); + + // Act: + Vector256 resultVector = SimdUtils.HwIntrinsics.Scale16x2_8x1(MemoryMarshal.Cast>(data)); + ref float result = ref Unsafe.As, float>(ref resultVector); + + // Assert: + // Comparison epsilon is tricky but 10^(-4) is good enough (?) + var comparer = new ApproximateFloatComparer(0.0001f); + for (int i = 0; i < Vector256.Count; i++) + { + float actual = Unsafe.Add(ref result, i); + float expected = CalculateAverage16x2_8x1(data, i); + + Assert.True(comparer.Equals(actual, expected), $"Pos {i}, Expected: {expected}, Actual: {actual}"); + } + + static float CalculateAverage16x2_8x1(Span data, int index) + { + int upIdx = index * 2; + int lowIdx = (index + 8) * 2; + return 0.25f * (data[upIdx] + data[upIdx + 1] + data[lowIdx] + data[lowIdx + 1]); + } + } +#endif + #if SUPPORTS_RUNTIME_INTRINSICS [Fact] public void PackFromRgbPlanesAvx2Reduce_Rgb24() From 121d1fa917da89c47a31a703862dfae77bed5f7a Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 10 Jun 2021 04:13:18 +0300 Subject: [PATCH 93/99] Fixed build error due to invalid using --- tests/ImageSharp.Tests/Common/SimdUtilsTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 69f1b20fb..40f0e0c7b 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -6,8 +6,8 @@ using System.Linq; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; #if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; #endif using SixLabors.ImageSharp.PixelFormats; From 20a0d846768bb7662fc19cb6ae88648b5b3a0810 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 10 Jun 2021 05:09:53 +0300 Subject: [PATCH 94/99] Moved jpeg matrix scaler to jpeg converter --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 19 ------------- .../Encoder/RgbToYCbCrConverterVectorized.cs | 27 ++++++++++++++++--- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index caeb694a9..b530a37e7 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -577,25 +577,6 @@ namespace SixLabors.ImageSharp } } - /// - /// Scales 8x8 matrix to 4x2 using 2x2 average - /// - /// Input matrix consisting of 4 256bit vectors, first row: (v[0], v[2]), second row: (v[1], v[3]) - /// 256bit vector containing upper and lower scaled parts of the input matrix - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 Scale16x2_8x1(ReadOnlySpan> v) - { - DebugGuard.IsTrue(v.Length == 4, "Input span must consist of 4 elements"); - - var f025 = Vector256.Create(0.25f); - - Vector256 left = Avx.Add(v[0], v[2]); - Vector256 right = Avx.Add(v[1], v[3]); - Vector256 avg2x2 = Avx.Multiply(Avx.HorizontalAdd(left, right), f025); - - return Avx2.Permute4x64(avg2x2.AsDouble(), 0b11_01_10_00).AsSingle(); - } - /// /// as many elements as possible, slicing them down (keeping the remainder). /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 56da8acc7..1b7df596c 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -221,9 +221,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder bDataLanes[j] = b; } - r = SimdUtils.HwIntrinsics.Scale16x2_8x1(rDataLanes); - g = SimdUtils.HwIntrinsics.Scale16x2_8x1(gDataLanes); - b = SimdUtils.HwIntrinsics.Scale16x2_8x1(bDataLanes); + r = Scale16x2_8x1(rDataLanes); + g = Scale16x2_8x1(gDataLanes); + b = Scale16x2_8x1(bDataLanes); // 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b)) Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r)); @@ -233,5 +233,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder } #endif } + +#if SUPPORTS_RUNTIME_INTRINSICS + /// + /// Scales 16x2 matrix to 8x1 using 2x2 average + /// + /// Input matrix consisting of 4 256bit vectors + /// 256bit vector containing upper and lower scaled parts of the input matrix + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector256 Scale16x2_8x1(ReadOnlySpan> v) + { + DebugGuard.IsTrue(v.Length == 4, "Input span must consist of 4 elements"); + + var f025 = Vector256.Create(0.25f); + + Vector256 left = Avx.Add(v[0], v[2]); + Vector256 right = Avx.Add(v[1], v[3]); + Vector256 avg2x2 = Avx.Multiply(Avx.HorizontalAdd(left, right), f025); + + return Avx2.Permute4x64(avg2x2.AsDouble(), 0b11_01_10_00).AsSingle(); + } } +#endif } From 6d4e2ee23c4d2fb42d5039044b998c476f2a8c52 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 10 Jun 2021 05:12:40 +0300 Subject: [PATCH 95/99] Moved jpeg converter scaler tests to to jpeg converter tests --- .../Encoder/RgbToYCbCrConverterVectorized.cs | 2 +- .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 40 ----------------- .../Formats/Jpg/RgbToYCbCrConverterTests.cs | 43 +++++++++++++++++++ 3 files changed, 44 insertions(+), 41 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 1b7df596c..0fcffbc7e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -234,7 +234,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder #endif } -#if SUPPORTS_RUNTIME_INTRINSICS +#if SUPPORTS_RUNTIME_INTRINSICS /// /// Scales 16x2 matrix to 8x1 using 2x2 average /// diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 40f0e0c7b..1f680aa6c 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -5,9 +5,7 @@ using System; using System.Linq; using System.Numerics; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; #if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; #endif using SixLabors.ImageSharp.PixelFormats; @@ -360,44 +358,6 @@ namespace SixLabors.ImageSharp.Tests.Common SimdUtils.PackFromRgbPlanes(Configuration.Default, r, g, b, actual)); } -#if SUPPORTS_RUNTIME_INTRINSICS - [Theory] - [InlineData(1)] - [InlineData(2)] - [InlineData(3)] - public void Scale16x2_8x1(int seed) - { - if (!Avx.IsSupported) - { - return; - } - - Span data = new Random(seed).GenerateRandomFloatArray(Vector256.Count * 4, -1000, 1000); - - // Act: - Vector256 resultVector = SimdUtils.HwIntrinsics.Scale16x2_8x1(MemoryMarshal.Cast>(data)); - ref float result = ref Unsafe.As, float>(ref resultVector); - - // Assert: - // Comparison epsilon is tricky but 10^(-4) is good enough (?) - var comparer = new ApproximateFloatComparer(0.0001f); - for (int i = 0; i < Vector256.Count; i++) - { - float actual = Unsafe.Add(ref result, i); - float expected = CalculateAverage16x2_8x1(data, i); - - Assert.True(comparer.Equals(actual, expected), $"Pos {i}, Expected: {expected}, Actual: {actual}"); - } - - static float CalculateAverage16x2_8x1(Span data, int index) - { - int upIdx = index * 2; - int lowIdx = (index + 8) * 2; - return 0.25f * (data[upIdx] + data[upIdx + 1] + data[lowIdx] + data[lowIdx + 1]); - } - } -#endif - #if SUPPORTS_RUNTIME_INTRINSICS [Fact] public void PackFromRgbPlanesAvx2Reduce_Rgb24() diff --git a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs index d95191ffe..0d5b55038 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs @@ -2,6 +2,12 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif using SixLabors.ImageSharp.ColorSpaces; using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder; @@ -98,6 +104,43 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Verify420(data, yBlocks, ref cb, ref cr, new ApproximateFloatComparer(1F)); } +#if SUPPORTS_RUNTIME_INTRINSICS + [Theory] + [InlineData(1)] + [InlineData(2)] + [InlineData(3)] + public void Scale16x2_8x1(int seed) + { + if (!Avx2.IsSupported) + { + return; + } + + Span data = new Random(seed).GenerateRandomFloatArray(Vector256.Count * 4, -1000, 1000); + + // Act: + Vector256 resultVector = RgbToYCbCrConverterVectorized.Scale16x2_8x1(MemoryMarshal.Cast>(data)); + ref float result = ref Unsafe.As, float>(ref resultVector); + + // Assert: + // Comparison epsilon is tricky but 10^(-4) is good enough (?) + var comparer = new ApproximateFloatComparer(0.0001f); + for (int i = 0; i < Vector256.Count; i++) + { + float actual = Unsafe.Add(ref result, i); + float expected = CalculateAverage16x2_8x1(data, i); + + Assert.True(comparer.Equals(actual, expected), $"Pos {i}, Expected: {expected}, Actual: {actual}"); + } + + static float CalculateAverage16x2_8x1(Span data, int index) + { + int upIdx = index * 2; + int lowIdx = (index + 8) * 2; + return 0.25f * (data[upIdx] + data[upIdx + 1] + data[lowIdx] + data[lowIdx + 1]); + } + } +#endif private static void Verify444( ReadOnlySpan data, From ce1d9922004c45724b0c48ec1609688bd6dde33d Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 10 Jun 2021 05:17:28 +0300 Subject: [PATCH 96/99] Fixed invalid curly braces, added debug Avx2 check --- .../Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs index 0fcffbc7e..926e7d5a4 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs @@ -243,6 +243,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static Vector256 Scale16x2_8x1(ReadOnlySpan> v) { + Debug.Assert(Avx2.IsSupported, "AVX2 is required to run this converter"); DebugGuard.IsTrue(v.Length == 4, "Input span must consist of 4 elements"); var f025 = Vector256.Create(0.25f); @@ -253,6 +254,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder return Avx2.Permute4x64(avg2x2.AsDouble(), 0b11_01_10_00).AsSingle(); } - } #endif + } } From 8bbcd6519762a93fcd094e797b591ac4c11f5843 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 10 Jun 2021 17:26:18 +0300 Subject: [PATCH 97/99] Improved benchmark for jpeg encoder --- .../Codecs/Jpeg/EncodeJpeg.cs | 34 +++++++++++++------ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs index e807c416b..5e0a5aff3 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs @@ -13,14 +13,11 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { public class EncodeJpeg { - [Params(50, 75, 95, 100)] + [Params(75, 90, 100)] public int Quality; private const string TestImage = TestImages.Jpeg.BenchmarkSuite.Jpeg420Exif_MidSizeYCbCr; - // GDI+ uses 4:2:0 subsampling - private const JpegSubsample EncodingSubsampling = JpegSubsample.Ratio420; - // System.Drawing private SDImage bmpDrawing; private Stream bmpStream; @@ -29,7 +26,8 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg // ImageSharp private Image bmpCore; - private JpegEncoder encoder; + private JpegEncoder encoder420; + private JpegEncoder encoder444; private MemoryStream destinationStream; @@ -42,14 +40,15 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg this.bmpCore = Image.Load(this.bmpStream); this.bmpCore.Metadata.ExifProfile = null; - this.encoder = new JpegEncoder { Quality = Quality, Subsample = EncodingSubsampling }; + this.encoder420 = new JpegEncoder { Quality = this.Quality, Subsample = JpegSubsample.Ratio420 }; + this.encoder444 = new JpegEncoder { Quality = this.Quality, Subsample = JpegSubsample.Ratio444 }; this.bmpStream.Position = 0; this.bmpDrawing = SDImage.FromStream(this.bmpStream); this.jpegCodec = GetEncoder(ImageFormat.Jpeg); this.encoderParameters = new EncoderParameters(1); // Quality cast to long is necessary - this.encoderParameters.Param[0] = new EncoderParameter(Encoder.Quality, (long)Quality); + this.encoderParameters.Param[0] = new EncoderParameter(Encoder.Quality, (long)this.Quality); this.destinationStream = new MemoryStream(); } @@ -60,21 +59,34 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { this.bmpStream.Dispose(); this.bmpStream = null; + + this.destinationStream.Dispose(); + this.destinationStream = null; + this.bmpCore.Dispose(); this.bmpDrawing.Dispose(); + + this.encoderParameters.Dispose(); } - [Benchmark(Baseline = true, Description = "System.Drawing Jpeg")] + [Benchmark(Baseline = true, Description = "System.Drawing Jpeg 4:2:0")] public void JpegSystemDrawing() { this.bmpDrawing.Save(this.destinationStream, this.jpegCodec, this.encoderParameters); this.destinationStream.Seek(0, SeekOrigin.Begin); } - [Benchmark(Description = "ImageSharp Jpeg")] - public void JpegCore() + [Benchmark(Description = "ImageSharp Jpeg 4:2:0")] + public void JpegCore420() + { + this.bmpCore.SaveAsJpeg(this.destinationStream, this.encoder420); + this.destinationStream.Seek(0, SeekOrigin.Begin); + } + + [Benchmark(Description = "ImageSharp Jpeg 4:4:4")] + public void JpegCore444() { - this.bmpCore.SaveAsJpeg(this.destinationStream, this.encoder); + this.bmpCore.SaveAsJpeg(this.destinationStream, this.encoder444); this.destinationStream.Seek(0, SeekOrigin.Begin); } From ab8ed086c0b8c6207e050b97e7c0ca70b11482ae Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Thu, 10 Jun 2021 17:27:02 +0300 Subject: [PATCH 98/99] Updated benchmark results --- .../Codecs/Jpeg/EncodeJpeg.cs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs index 5e0a5aff3..47c6f2c7d 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs @@ -110,12 +110,21 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19042 Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores .NET Core SDK=6.0.100-preview.3.21202.5 - [Host] : .NET Core 3.1.13 (CoreCLR 4.700.21.11102, CoreFX 4.700.21.11602), X64 RyuJIT + [Host] : .NET Core 3.1.13 (CoreCLR 4.700.21.11102, CoreFX 4.700.21.11602), X64 RyuJIT [AttachedDebugger] DefaultJob : .NET Core 3.1.13 (CoreCLR 4.700.21.11102, CoreFX 4.700.21.11602), X64 RyuJIT -| Method | Mean | Error | StdDev | Ratio | RatioSD | -|---------------------- |---------:|---------:|---------:|------:|--------:| -| 'System.Drawing Jpeg' | 39.67 ms | 0.774 ms | 0.828 ms | 1.00 | 0.00 | -| 'ImageSharp Jpeg' | 45.39 ms | 0.415 ms | 0.346 ms | 1.14 | 0.03 | +| Method | Quality | Mean | Error | StdDev | Ratio | RatioSD | +|---------------------------- |-------- |---------:|---------:|---------:|------:|--------:| +| 'System.Drawing Jpeg 4:2:0' | 75 | 30.60 ms | 0.496 ms | 0.464 ms | 1.00 | 0.00 | +| 'ImageSharp Jpeg 4:2:0' | 75 | 29.86 ms | 0.350 ms | 0.311 ms | 0.98 | 0.02 | +| 'ImageSharp Jpeg 4:4:4' | 75 | 45.36 ms | 0.899 ms | 1.036 ms | 1.48 | 0.05 | +| | | | | | | | +| 'System.Drawing Jpeg 4:2:0' | 90 | 34.05 ms | 0.669 ms | 0.687 ms | 1.00 | 0.00 | +| 'ImageSharp Jpeg 4:2:0' | 90 | 37.26 ms | 0.706 ms | 0.660 ms | 1.10 | 0.03 | +| 'ImageSharp Jpeg 4:4:4' | 90 | 52.54 ms | 0.579 ms | 0.514 ms | 1.55 | 0.04 | +| | | | | | | | +| 'System.Drawing Jpeg 4:2:0' | 100 | 39.36 ms | 0.267 ms | 0.237 ms | 1.00 | 0.00 | +| 'ImageSharp Jpeg 4:2:0' | 100 | 42.44 ms | 0.410 ms | 0.383 ms | 1.08 | 0.01 | +| 'ImageSharp Jpeg 4:4:4' | 100 | 70.88 ms | 0.508 ms | 0.450 ms | 1.80 | 0.02 | */ From 87aec89f25fa752727a2396275a50d63df9e1e15 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 11 Jun 2021 18:43:15 +1000 Subject: [PATCH 99/99] Use GreatestCommonDivisor. Fix #1616 --- .../Processors/Transforms/Resize/ResizeKernelMap.cs | 4 ++-- .../Processing/Processors/Transforms/ResizeKernelMapTests.cs | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs index ab6040c17..2ab1d8b5a 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs @@ -130,9 +130,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Transforms int radius = (int)TolerantMath.Ceiling(scale * sampler.Radius); // 'ratio' is a rational number. - // Multiplying it by LCM(sourceSize, destSize)/sourceSize will result in a whole number "again". + // Multiplying it by destSize/GCD(sourceSize, destSize) will result in a whole number "again". // This value is determining the length of the periods in repeating kernel map rows. - int period = Numerics.LeastCommonMultiple(sourceSize, destinationSize) / sourceSize; + int period = destinationSize / Numerics.GreatestCommonDivisor(sourceSize, destinationSize); // the center position at i == 0: double center0 = (ratio - 1) * 0.5; diff --git a/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeKernelMapTests.cs b/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeKernelMapTests.cs index f15a6242d..1d4629ccc 100644 --- a/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeKernelMapTests.cs +++ b/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeKernelMapTests.cs @@ -80,6 +80,9 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Transforms { KnownResamplers.Bicubic, 1680, 1200 }, { KnownResamplers.Box, 13, 299 }, { KnownResamplers.Lanczos5, 3032, 600 }, + + // Large number. https://github.com/SixLabors/ImageSharp/issues/1616 + { KnownResamplers.Bicubic, 207773, 51943 } }; public static TheoryData GeneratedImageResizeData =