From 477716c5fa88b6b6ee99de6a9fad5433ebcb5ac6 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Tue, 25 Jan 2022 14:58:25 +0300 Subject: [PATCH 01/14] Fused transpose/zigzag implementation --- .../Formats/Jpeg/Components/Block8x8F.cs | 8 +- .../FastFloatingPointDCT.Intrinsic.cs | 5 +- .../Jpeg/Components/FastFloatingPointDCT.cs | 133 +----- .../Jpeg/Components/ZigZag.Intrinsic.cs | 426 +++++++++--------- 4 files changed, 229 insertions(+), 343 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index f252864476..d7511fddac 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -280,7 +280,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } /// - /// Quantize input block, apply zig-zag ordering and store result as 16bit integers. + /// Quantize input block, transpose, apply zig-zag ordering and store as . /// /// Source block. /// Destination block. @@ -291,19 +291,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components if (Avx2.IsSupported) { MultiplyIntoInt16_Avx2(ref block, ref qt, ref dest); - ZigZag.ApplyZigZagOrderingAvx2(ref dest); + ZigZag.ApplyTransposingZigZagOrderingAvx2(ref dest); } else if (Ssse3.IsSupported) { MultiplyIntoInt16_Sse2(ref block, ref qt, ref dest); - ZigZag.ApplyZigZagOrderingSsse3(ref dest); + ZigZag.ApplyTransposingZigZagOrderingSsse3(ref dest); } else #endif { for (int i = 0; i < Size; i++) { - int idx = ZigZag.ZigZagOrder[i]; + int idx = ZigZag.TransposingOrder[i]; float quantizedVal = block[idx] * qt[idx]; quantizedVal += quantizedVal < 0 ? -0.5f : 0.5f; dest[i] = (short)quantizedVal; diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs index 94864005ec..8acc4b6269 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs @@ -29,11 +29,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { DebugGuard.IsTrue(Avx.IsSupported, "Avx support is required to execute this operation."); - // First pass - process rows - block.TransposeInplace(); + // First pass - process columns FDCT8x8_1D_Avx(ref block); - // Second pass - process columns + // Second pass - process rows block.TransposeInplace(); FDCT8x8_1D_Avx(ref block); diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index c27ad5b82b..e1bcff30f3 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -92,6 +92,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components tableRef = 0.125f / (tableRef * Unsafe.Add(ref multipliersRef, i)); tableRef = ref Unsafe.Add(ref tableRef, 1); } + + // Spectral macroblocks are not transposed before quantization + // Transpose is done after quantization at zig-zag stage + // so we must transpose quantization table + quantTable.TransposeInplace(); } /// @@ -133,14 +138,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } else #endif - if (Vector.IsHardwareAccelerated) { FDCT_Vector4(ref block); } - else - { - FDCT_Scalar(ref block); - } } /// @@ -217,136 +217,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } } - /// - /// Apply 2D floating point FDCT inplace using scalar operations. - /// - /// - /// Ported from libjpeg-turbo https://github.com/libjpeg-turbo/libjpeg-turbo/blob/main/jfdctflt.c. - /// - /// Input block. - private static void FDCT_Scalar(ref Block8x8F block) - { - const int dctSize = 8; - - float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - float tmp10, tmp11, tmp12, tmp13; - float z1, z2, z3, z4, z5, z11, z13; - - // First pass - process rows - ref float blockRef = ref Unsafe.As(ref block); - for (int ctr = 7; ctr >= 0; ctr--) - { - tmp0 = Unsafe.Add(ref blockRef, 0) + Unsafe.Add(ref blockRef, 7); - tmp7 = Unsafe.Add(ref blockRef, 0) - Unsafe.Add(ref blockRef, 7); - tmp1 = Unsafe.Add(ref blockRef, 1) + Unsafe.Add(ref blockRef, 6); - tmp6 = Unsafe.Add(ref blockRef, 1) - Unsafe.Add(ref blockRef, 6); - tmp2 = Unsafe.Add(ref blockRef, 2) + Unsafe.Add(ref blockRef, 5); - tmp5 = Unsafe.Add(ref blockRef, 2) - Unsafe.Add(ref blockRef, 5); - tmp3 = Unsafe.Add(ref blockRef, 3) + Unsafe.Add(ref blockRef, 4); - tmp4 = Unsafe.Add(ref blockRef, 3) - Unsafe.Add(ref blockRef, 4); - - // Even part - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp1 + tmp2; - tmp12 = tmp1 - tmp2; - - Unsafe.Add(ref blockRef, 0) = tmp10 + tmp11; - Unsafe.Add(ref blockRef, 4) = tmp10 - tmp11; - - z1 = (tmp12 + tmp13) * 0.707106781f; - Unsafe.Add(ref blockRef, 2) = tmp13 + z1; - Unsafe.Add(ref blockRef, 6) = tmp13 - z1; - - // Odd part - tmp10 = tmp4 + tmp5; - tmp11 = tmp5 + tmp6; - tmp12 = tmp6 + tmp7; - - z5 = (tmp10 - tmp12) * 0.382683433f; - z2 = (0.541196100f * tmp10) + z5; - z4 = (1.306562965f * tmp12) + z5; - z3 = tmp11 * 0.707106781f; - - z11 = tmp7 + z3; - z13 = tmp7 - z3; - - Unsafe.Add(ref blockRef, 5) = z13 + z2; - Unsafe.Add(ref blockRef, 3) = z13 - z2; - Unsafe.Add(ref blockRef, 1) = z11 + z4; - Unsafe.Add(ref blockRef, 7) = z11 - z4; - - blockRef = ref Unsafe.Add(ref blockRef, dctSize); - } - - // Second pass - process columns - blockRef = ref Unsafe.As(ref block); - for (int ctr = 7; ctr >= 0; ctr--) - { - tmp0 = Unsafe.Add(ref blockRef, dctSize * 0) + Unsafe.Add(ref blockRef, dctSize * 7); - tmp7 = Unsafe.Add(ref blockRef, dctSize * 0) - Unsafe.Add(ref blockRef, dctSize * 7); - tmp1 = Unsafe.Add(ref blockRef, dctSize * 1) + Unsafe.Add(ref blockRef, dctSize * 6); - tmp6 = Unsafe.Add(ref blockRef, dctSize * 1) - Unsafe.Add(ref blockRef, dctSize * 6); - tmp2 = Unsafe.Add(ref blockRef, dctSize * 2) + Unsafe.Add(ref blockRef, dctSize * 5); - tmp5 = Unsafe.Add(ref blockRef, dctSize * 2) - Unsafe.Add(ref blockRef, dctSize * 5); - tmp3 = Unsafe.Add(ref blockRef, dctSize * 3) + Unsafe.Add(ref blockRef, dctSize * 4); - tmp4 = Unsafe.Add(ref blockRef, dctSize * 3) - Unsafe.Add(ref blockRef, dctSize * 4); - - // Even part - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp1 + tmp2; - tmp12 = tmp1 - tmp2; - - Unsafe.Add(ref blockRef, dctSize * 0) = tmp10 + tmp11; - Unsafe.Add(ref blockRef, dctSize * 4) = tmp10 - tmp11; - - z1 = (tmp12 + tmp13) * 0.707106781f; - Unsafe.Add(ref blockRef, dctSize * 2) = tmp13 + z1; - Unsafe.Add(ref blockRef, dctSize * 6) = tmp13 - z1; - - // Odd part - tmp10 = tmp4 + tmp5; - tmp11 = tmp5 + tmp6; - tmp12 = tmp6 + tmp7; - - z5 = (tmp10 - tmp12) * 0.382683433f; - z2 = (0.541196100f * tmp10) + z5; - z4 = (1.306562965f * tmp12) + z5; - z3 = tmp11 * 0.707106781f; - - z11 = tmp7 + z3; - z13 = tmp7 - z3; - - Unsafe.Add(ref blockRef, dctSize * 5) = z13 + z2; - Unsafe.Add(ref blockRef, dctSize * 3) = z13 - z2; - Unsafe.Add(ref blockRef, dctSize * 1) = z11 + z4; - Unsafe.Add(ref blockRef, dctSize * 7) = z11 - z4; - - blockRef = ref Unsafe.Add(ref blockRef, 1); - } - } - /// /// Apply floating point FDCT inplace using API. /// - /// - /// This implementation must be called only if hardware supports 4 - /// floating point numbers vector. Otherwise explicit scalar - /// implementation is faster - /// because it does not rely on block transposition. - /// /// Input block. public static void FDCT_Vector4(ref Block8x8F block) { - DebugGuard.IsTrue(Vector.IsHardwareAccelerated, "Scalar implementation should be called for non-accelerated hardware."); - - // First pass - process rows - block.TransposeInplace(); + // First pass - process columns FDCT8x4_Vector4(ref block.V0L); FDCT8x4_Vector4(ref block.V0R); - // Second pass - process columns + // Second pass - process rows block.TransposeInplace(); FDCT8x4_Vector4(ref block.V0L); FDCT8x4_Vector4(ref block.V0R); diff --git a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs index 6577739c1a..e5faf97257 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs @@ -18,120 +18,138 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components #pragma warning restore SA1309 /// - /// Gets shuffle vectors for + /// Gets shuffle vectors for /// zig zag implementation. /// private static ReadOnlySpan SseShuffleMasks => new byte[] { - // row0 - 0, 1, 2, 3, _, _, _, _, _, _, 4, 5, 6, 7, _, _, - _, _, _, _, 0, 1, _, _, 2, 3, _, _, _, _, 4, 5, - _, _, _, _, _, _, 0, 1, _, _, _, _, _, _, _, _, - - // row1 - _, _, _, _, _, _, _, _, _, _, _, _, 8, 9, 10, 11, - 2, 3, _, _, _, _, _, _, 4, 5, _, _, _, _, _, _, - _, _, 0, 1, _, _, 2, 3, _, _, _, _, _, _, _, _, - - // row2 - _, _, _, _, _, _, 2, 3, _, _, _, _, _, _, 4, 5, - _, _, _, _, _, _, _, _, 0, 1, _, _, 2, 3, _, _, - - // row3 - _, _, _, _, _, _, 12, 13, 14, 15, _, _, _, _, _, _, - _, _, _, _, 10, 11, _, _, _, _, 12, 13, _, _, _, _, - _, _, 8, 9, _, _, _, _, _, _, _, _, 10, 11, _, _, - 6, 7, _, _, _, _, _, _, _, _, _, _, _, _, 8, 9, - - // row4 - _, _, 4, 5, _, _, _, _, _, _, _, _, 6, 7, _, _, - _, _, _, _, 2, 3, _, _, _, _, 4, 5, _, _, _, _, - _, _, _, _, _, _, 0, 1, 2, 3, _, _, _, _, _, _, - - // row5 - _, _, 12, 13, _, _, 14, 15, _, _, _, _, _, _, _, _, - 10, 11, _, _, _, _, _, _, 12, 13, _, _, _, _, _, _, - - // row6 - _, _, _, _, _, _, _, _, 12, 13, _, _, 14, 15, _, _, - _, _, _, _, _, _, 10, 11, _, _, _, _, _, _, 12, 13, - 4, 5, 6, 7, _, _, _, _, _, _, _, _, _, _, _, _, - - // row7 - 10, 11, _, _, _, _, 12, 13, _, _, 14, 15, _, _, _, _, - _, _, 8, 9, 10, 11, _, _, _, _, _, _, 12, 13, 14, 15 +#pragma warning disable SA1515 + /* row0 - A0 B0 A1 A2 B1 C0 D0 C1 */ + // A + 0, 1, _, _, 2, 3, 4, 5, _, _, _, _, _, _, _, _, + // B + _, _, 0, 1, _, _, _, _, 2, 3, _, _, _, _, _, _, + // C + _, _, _, _, _, _, _, _, _, _, 0, 1, _, _, 2, 3, + + /* row1 - B2 A3 A4 B3 C2 D1 E0 F0 */ + // A + _, _, 6, 7, 8, 9, _, _, _, _, _, _, _, _, _, _, + // B + 4, 5, _, _, _, _, 6, 7, _, _, _, _, _, _, _, _, + + /* row2 - E1 D2 C3 B4 A5 A6 B5 C4 */ + // A + _, _, _, _, _, _, _, _, 10, 11, 12, 13, _, _, _, _, + // B + _, _, _, _, _, _, 8, 9, _, _, _, _, 10, 11, _, _, + // C + _, _, _, _, 6, 7, _, _, _, _, _, _, _, _, 8, 9, + + /* row3 - D3 E2 F1 G0 H0 G1 F2 E3 */ + // E + _, _, 4, 5, _, _, _, _, _, _, _, _, _, _, 6, 7, + // F + _, _, _, _, 2, 3, _, _, _, _, _, _, 4, 5, _, _, + // G + _, _, _, _, _, _, 0, 1, _, _, 2, 3, _, _, _, _, + + /* row4 - D4 C5 B6 A7 B7 C6 D5 E4 */ + // B + _, _, _, _, 12, 13, _, _, 14, 15, _, _, _, _, _, _, + // C + _, _, 10, 11, _, _, _, _, _, _, 12, 13, _, _, _, _, + // D + 8, 9, _, _, _, _, _, _, _, _, _, _, 10, 11, _, _, + + /* row5 - F3 G2 H1 H2 G3 F4 E5 D6 */ + // F + 6, 7, _, _, _, _, _, _, _, _, 8, 9, _, _, _, _, + // G + _, _, 4, 5, _, _, _, _, 6, 7, _, _, _, _, _, _, + // H + _, _, _, _, 2, 3, 4, 5, _, _, _, _, _, _, _, _, + + /* row6 - C7 D7 E6 F5 G4 H3 H4 G5 */ + // G + _, _, _, _, _, _, _, _, 8, 9, _, _, _, _, 10, 11, + // H + _, _, _, _, _, _, _, _, _, _, 6, 7, 8, 9, _, _, + + /* row7 - F6 E7 F7 G6 H5 H6 G7 H7 */ + // F + 12, 13, _, _, 14, 15, _, _, _, _, _, _, _, _, _, _, + // G + _, _, _, _, _, _, 12, 13, _, _, _, _, 14, 15, _, _, + // H + _, _, _, _, _, _, _, _, 10, 11, 12, 13, _, _, 14, 15, +#pragma warning restore SA1515 }; /// - /// Gets shuffle vectors for + /// Gets shuffle vectors for /// zig zag implementation. /// private static ReadOnlySpan AvxShuffleMasks => new byte[] { - // 01_AB/01_EF/23_CD - cross-lane - 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, - - // 01_AB - inner-lane - 0, 1, 2, 3, 8, 9, _, _, 10, 11, 4, 5, 6, 7, 12, 13, _, _, _, _, _, _, _, _, _, _, 10, 11, 4, 5, 6, 7, - - // 01_CD/23_GH - cross-lane - 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, _, _, _, _, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, _, _, _, _, - - // 01_CD - inner-lane - _, _, _, _, _, _, 0, 1, _, _, _, _, _, _, _, _, 2, 3, 8, 9, _, _, 10, 11, 4, 5, _, _, _, _, _, _, - - // 01_EF - inner-lane - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 0, 1, _, _, _, _, _, _, _, _, _, _, - - // 23_AB/45_CD/67_EF - cross-lane - 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, _, _, _, _, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, _, _, _, _, - - // 23_AB - inner-lane - 4, 5, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 6, 7, 0, 1, 2, 3, 8, 9, _, _, _, _, - - // 23_CD - inner-lane - _, _, 6, 7, 12, 13, _, _, _, _, _, _, _, _, _, _, 10, 11, 4, 5, _, _, _, _, _, _, _, _, 6, 7, 12, 13, - - // 23_EF - inner-lane - _, _, _, _, _, _, 2, 3, 8, 9, _, _, 10, 11, 4, 5, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - - // 23_GH - inner-lane - _, _, _, _, _, _, _, _, _, _, 0, 1, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - - // 45_AB - inner-lane - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 10, 11, _, _, _, _, _, _, _, _, _, _, - - // 45_CD - inner-lane - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 6, 7, 0, 1, _, _, 2, 3, 8, 9, _, _, _, _, _, _, - - // 45_EF - cross-lane - 1, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, _, _, _, _, 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, - - // 45_EF - inner-lane - 2, 3, 8, 9, _, _, _, _, _, _, _, _, 10, 11, 4, 5, _, _, _, _, _, _, _, _, _, _, 2, 3, 8, 9, _, _, - - // 45_GH - inner-lane - _, _, _, _, 2, 3, 8, 9, 10, 11, 4, 5, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 6, 7, - - // 67_CD - inner-lane - _, _, _, _, _, _, _, _, _, _, 10, 11, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - - // 67_EF - inner-lane - _, _, _, _, _, _, 6, 7, 0, 1, _, _, 2, 3, 8, 9, _, _, _, _, _, _, _, _, 10, 11, _, _, _, _, _, _, - - // 67_GH - inner-lane - 8, 9, 10, 11, 4, 5, _, _, _, _, _, _, _, _, _, _, 2, 3, 8, 9, 10, 11, 4, 5, _, _, 6, 7, 12, 13, 14, 15 +#pragma warning disable SA1515 + /* 01 */ + // [cr] crln_01_AB_CD + 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, _, _, _, _, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, + // (in) AB + 0, 1, 8, 9, 2, 3, 4, 5, 10, 11, _, _, _, _, _, _, 12, 13, 2, 3, 4, 5, 14, 15, _, _, _, _, _, _, _, _, + // (in) CD + _, _, _, _, _, _, _, _, _, _, 0, 1, 8, 9, 2, 3, _, _, _, _, _, _, _, _, 0, 1, 10, 11, _, _, _, _, + // [cr] crln_01_23_EF_23_CD + 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, + // (in) EF + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 0, 1, 8, 9, + + /* 23 */ + // [cr] crln_23_AB_23_45_GH + 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, + // (in) AB + _, _, _, _, _, _, 8, 9, 2, 3, 4, 5, 10, 11, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + // (in) CDe + _, _, 12, 13, 6, 7, _, _, _, _, _, _, _, _, 8, 9, 14, 15, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + // (in) EF + 2, 3, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 4, 5, 10, 11, _, _, _, _, _, _, 12, 13, 6, 7, + // (in) GH + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 0, 1, 8, 9, 2, 3, _, _, _, _, + + /* 45 */ + // (in) AB + _, _, _, _, 12, 13, 6, 7, 14, 15, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + // [cr] crln_45_67_CD_45_EF + 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, + // (in) CD + 8, 9, 2, 3, _, _, _, _, _, _, 4, 5, 10, 11, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 12, 13, + // (in) EF + _, _, _, _, _, _, _, _, _, _, _, _, _, _, 0, 1, 6, 7, _, _, _, _, _, _, _, _, 8, 9, 2, 3, _, _, + // (in) GH + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 4, 5, 10, 11, 12, 13, 6, 7, _, _, _, _, _, _, + + /* 67 */ + // (in) CD + 6, 7, 14, 15, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + // [cr] crln_67_EF_67_GH + 2, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, _, _, _, _, + // (in) EF + _, _, _, _, 4, 5, 14, 15, _, _, _, _, _, _, _, _, 8, 9, 2, 3, 10, 11, _, _, _, _, _, _, _, _, _, _, + // (in) GH + _, _, _, _, _, _, _, _, 0, 1, 10, 11, 12, 13, 2, 3, _, _, _, _, _, _, 0, 1, 6, 7, 8, 9, 2, 3, 10, 11, +#pragma warning restore SA1515 }; /// /// Applies zig zag ordering for given 8x8 matrix using SSE cpu intrinsics. /// /// Input matrix. - public static unsafe void ApplyZigZagOrderingSsse3(ref Block8x8 block) + public static unsafe void ApplyTransposingZigZagOrderingSsse3(ref Block8x8 block) { DebugGuard.IsTrue(Ssse3.IsSupported, "Ssse3 support is required to run this operation!"); - fixed (byte* maskPtr = SseShuffleMasks) + fixed (byte* shuffleVectorsPtr = SseShuffleMasks) { Vector128 rowA = block.V0.AsByte(); Vector128 rowB = block.V1.AsByte(); @@ -142,73 +160,69 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Vector128 rowG = block.V6.AsByte(); Vector128 rowH = block.V7.AsByte(); - // row0 - A0 A1 B0 C0 B1 A2 A3 B2 - Vector128 rowA0 = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (16 * 0))).AsInt16(); - Vector128 rowB0 = Ssse3.Shuffle(rowB, Sse2.LoadVector128(maskPtr + (16 * 1))).AsInt16(); - Vector128 row0 = Sse2.Or(rowA0, rowB0); - Vector128 rowC0 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 2))).AsInt16(); - row0 = Sse2.Or(row0, rowC0); - - // row1 - C1 D0 E0 D1 C2 B3 A4 A5 - Vector128 rowA1 = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (16 * 3))).AsInt16(); - Vector128 rowC1 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 4))).AsInt16(); - Vector128 row1 = Sse2.Or(rowA1, rowC1); - Vector128 rowD1 = Ssse3.Shuffle(rowD, Sse2.LoadVector128(maskPtr + (16 * 5))).AsInt16(); - row1 = Sse2.Or(row1, rowD1); - row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowB.AsUInt16(), 3), 5).AsInt16(); - row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 0), 2).AsInt16(); - - // row2 - Vector128 rowE2 = Ssse3.Shuffle(rowE, Sse2.LoadVector128(maskPtr + (16 * 6))).AsInt16(); - Vector128 rowF2 = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (16 * 7))).AsInt16(); - Vector128 row2 = Sse2.Or(rowE2, rowF2); - row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowB.AsUInt16(), 4), 0).AsInt16(); - row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowC.AsUInt16(), 3), 1).AsInt16(); - row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 2), 2).AsInt16(); - row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowG.AsUInt16(), 0), 5).AsInt16(); - - // row3 - Vector128 rowA3 = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (16 * 8))).AsInt16().AsInt16(); - Vector128 rowB3 = Ssse3.Shuffle(rowB, Sse2.LoadVector128(maskPtr + (16 * 9))).AsInt16().AsInt16(); - Vector128 row3 = Sse2.Or(rowA3, rowB3); - Vector128 rowC3 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 10))).AsInt16(); - row3 = Sse2.Or(row3, rowC3); - Vector128 shuffleRowD3EF = Sse2.LoadVector128(maskPtr + (16 * 11)); - Vector128 rowD3 = Ssse3.Shuffle(rowD, shuffleRowD3EF).AsInt16(); - row3 = Sse2.Or(row3, rowD3); - - // row4 - Vector128 rowE4 = Ssse3.Shuffle(rowE, shuffleRowD3EF).AsInt16(); - Vector128 rowF4 = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (16 * 12))).AsInt16(); - Vector128 row4 = Sse2.Or(rowE4, rowF4); - Vector128 rowG4 = Ssse3.Shuffle(rowG, Sse2.LoadVector128(maskPtr + (16 * 13))).AsInt16(); - row4 = Sse2.Or(row4, rowG4); - Vector128 rowH4 = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (16 * 14))).AsInt16(); - row4 = Sse2.Or(row4, rowH4); - - // row5 - Vector128 rowC5 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 15))).AsInt16(); - Vector128 rowD5 = Ssse3.Shuffle(rowD, Sse2.LoadVector128(maskPtr + (16 * 16))).AsInt16(); - Vector128 row5 = Sse2.Or(rowC5, rowD5); - row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowB.AsUInt16(), 7), 2).AsInt16(); - row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 5), 5).AsInt16(); - row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowF.AsUInt16(), 4), 6).AsInt16(); - row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowG.AsUInt16(), 3), 7).AsInt16(); - - // row6 - Vector128 rowE6 = Ssse3.Shuffle(rowE, Sse2.LoadVector128(maskPtr + (16 * 17))).AsInt16(); - Vector128 rowF6 = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (16 * 18))).AsInt16(); - Vector128 row6 = Sse2.Or(rowE6, rowF6); - Vector128 rowH6 = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (16 * 19))).AsInt16(); - row6 = Sse2.Or(row6, rowH6); - row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 7), 5).AsInt16(); - row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowG.AsUInt16(), 4), 2).AsInt16(); - - // row7 - Vector128 rowG7 = Ssse3.Shuffle(rowG, Sse2.LoadVector128(maskPtr + (16 * 20))).AsInt16(); - Vector128 rowH7 = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (16 * 21))).AsInt16(); - Vector128 row7 = Sse2.Or(rowG7, rowH7); - row7 = Sse2.Insert(row7.AsUInt16(), Sse2.Extract(rowF.AsUInt16(), 7), 4).AsInt16(); + // row0 - A0 B0 A1 A2 B1 C0 D0 C1 + Vector128 row0_A = Ssse3.Shuffle(rowA, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 0))).AsInt16(); + Vector128 row0_B = Ssse3.Shuffle(rowB, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 1))).AsInt16(); + Vector128 row0_C = Ssse3.Shuffle(rowC, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 2))).AsInt16(); + Vector128 row0 = Sse2.Or(Sse2.Or(row0_A, row0_B), row0_C); + row0 = Sse2.Insert(row0.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 0), 6).AsInt16(); + + // row1 - B2 A3 A4 B3 C2 D1 E0 F0 + Vector128 row1_A = Ssse3.Shuffle(rowA, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 3))).AsInt16(); + Vector128 row1_B = Ssse3.Shuffle(rowB, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 4))).AsInt16(); + Vector128 row1 = Sse2.Or(row1_A, row1_B); + row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowC.AsUInt16(), 2), 4).AsInt16(); + row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 1), 5).AsInt16(); + row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 0), 6).AsInt16(); + row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowF.AsUInt16(), 0), 7).AsInt16(); + + // row2 - E1 D2 C3 B4 A5 A6 B5 C4 + Vector128 row2_A = Ssse3.Shuffle(rowA, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 5))).AsInt16(); + Vector128 row2_B = Ssse3.Shuffle(rowB, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 6))).AsInt16(); + Vector128 row2_C = Ssse3.Shuffle(rowC, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 7))).AsInt16(); + Vector128 row2 = Sse2.Or(Sse2.Or(row2_A, row2_B), row2_C); + row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 2), 1).AsInt16(); + row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 1), 0).AsInt16(); + + // row3 - D3 E2 F1 G0 H0 G1 F2 E3 + Vector128 row3_E = Ssse3.Shuffle(rowE, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 8))).AsInt16(); + Vector128 row3_F = Ssse3.Shuffle(rowF, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 9))).AsInt16(); + Vector128 row3_G = Ssse3.Shuffle(rowG, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 10))).AsInt16(); + Vector128 row3 = Sse2.Or(Sse2.Or(row3_E, row3_F), row3_G); + row3 = Sse2.Insert(row3.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 3), 0).AsInt16(); + row3 = Sse2.Insert(row3.AsUInt16(), Sse2.Extract(rowH.AsUInt16(), 0), 4).AsInt16(); + + // row4 - D4 C5 B6 A7 B7 C6 D5 E4 + Vector128 row4_B = Ssse3.Shuffle(rowB, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 11))).AsInt16(); + Vector128 row4_C = Ssse3.Shuffle(rowC, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 12))).AsInt16(); + Vector128 row4_D = Ssse3.Shuffle(rowD, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 13))).AsInt16(); + Vector128 row4 = Sse2.Or(Sse2.Or(row4_B, row4_C), row4_D); + row4 = Sse2.Insert(row4.AsUInt16(), Sse2.Extract(rowA.AsUInt16(), 7), 3).AsInt16(); + row4 = Sse2.Insert(row4.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 4), 7).AsInt16(); + + // row5 - F3 G2 H1 H2 G3 F4 E5 D6 + Vector128 row5_F = Ssse3.Shuffle(rowF, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 14))).AsInt16(); + Vector128 row5_G = Ssse3.Shuffle(rowG, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 15))).AsInt16(); + Vector128 row5_H = Ssse3.Shuffle(rowH, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 16))).AsInt16(); + Vector128 row5 = Sse2.Or(Sse2.Or(row5_F, row5_G), row5_H); + row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 6), 7).AsInt16(); + row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 5), 6).AsInt16(); + + // row6 - C7 D7 E6 F5 G4 H3 H4 G5 + Vector128 row6_G = Ssse3.Shuffle(rowG, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 17))).AsInt16(); + Vector128 row6_H = Ssse3.Shuffle(rowH, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 18))).AsInt16(); + Vector128 row6 = Sse2.Or(row6_G, row6_H); + row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowC.AsUInt16(), 7), 0).AsInt16(); + row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 7), 1).AsInt16(); + row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 6), 2).AsInt16(); + row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowF.AsUInt16(), 5), 3).AsInt16(); + + // row7 - F6 E7 F7 G6 H5 H6 G7 H7 + Vector128 row7_F = Ssse3.Shuffle(rowF, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 19))).AsInt16(); + Vector128 row7_G = Ssse3.Shuffle(rowG, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 20))).AsInt16(); + Vector128 row7_H = Ssse3.Shuffle(rowH, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 21))).AsInt16(); + Vector128 row7 = Sse2.Or(Sse2.Or(row7_F, row7_G), row7_H); + row7 = Sse2.Insert(row7.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 7), 1).AsInt16(); block.V0 = row0; block.V1 = row1; @@ -225,69 +239,61 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// Applies zig zag ordering for given 8x8 matrix using AVX cpu intrinsics. /// /// Input matrix. - public static unsafe void ApplyZigZagOrderingAvx2(ref Block8x8 block) + public static unsafe void ApplyTransposingZigZagOrderingAvx2(ref Block8x8 block) { DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!"); fixed (byte* shuffleVectorsPtr = AvxShuffleMasks) { - Vector256 rowsAB = block.V01.AsByte(); - Vector256 rowsCD = block.V23.AsByte(); - Vector256 rowsEF = block.V45.AsByte(); - Vector256 rowsGH = block.V67.AsByte(); - - // rows 0 1 - Vector256 rows_AB01_EF01_CD23_shuffleMask = Avx.LoadVector256(shuffleVectorsPtr + (0 * 32)).AsInt32(); - Vector256 row01_AB = Avx2.PermuteVar8x32(rowsAB.AsInt32(), rows_AB01_EF01_CD23_shuffleMask).AsByte(); + Vector256 rowAB = block.V01.AsByte(); + Vector256 rowCD = block.V23.AsByte(); + Vector256 rowEF = block.V45.AsByte(); + Vector256 rowGH = block.V67.AsByte(); + + /* row01 - A0 B0 A1 A2 B1 C0 D0 C1 | B2 A3 A4 B3 C2 D1 E0 F0 */ + Vector256 crln_01_AB_CD = Avx.LoadVector256(shuffleVectorsPtr + (0 * 32)).AsInt32(); + Vector256 row01_AB = Avx2.PermuteVar8x32(rowAB.AsInt32(), crln_01_AB_CD).AsByte(); row01_AB = Avx2.Shuffle(row01_AB, Avx.LoadVector256(shuffleVectorsPtr + (1 * 32))).AsByte(); - - Vector256 rows_CD01_GH23_shuffleMask = Avx.LoadVector256(shuffleVectorsPtr + (2 * 32)).AsInt32(); - Vector256 row01_CD = Avx2.PermuteVar8x32(rowsCD.AsInt32(), rows_CD01_GH23_shuffleMask).AsByte(); - row01_CD = Avx2.Shuffle(row01_CD, Avx.LoadVector256(shuffleVectorsPtr + (3 * 32))).AsByte(); - - Vector256 row0123_EF = Avx2.PermuteVar8x32(rowsEF.AsInt32(), rows_AB01_EF01_CD23_shuffleMask).AsByte(); - Vector256 row01_EF = Avx2.Shuffle(row0123_EF, Avx.LoadVector256(shuffleVectorsPtr + (4 * 32))).AsByte(); - - Vector256 row01 = Avx2.Or(Avx2.Or(row01_AB, row01_CD), row01_EF); - - // rows 2 3 - Vector256 rows_AB23_CD45_EF67_shuffleMask = Avx.LoadVector256(shuffleVectorsPtr + (5 * 32)).AsInt32(); - Vector256 row2345_AB = Avx2.PermuteVar8x32(rowsAB.AsInt32(), rows_AB23_CD45_EF67_shuffleMask).AsByte(); - Vector256 row23_AB = Avx2.Shuffle(row2345_AB, Avx.LoadVector256(shuffleVectorsPtr + (6 * 32))).AsByte(); - - Vector256 row23_CD = Avx2.PermuteVar8x32(rowsCD.AsInt32(), rows_AB01_EF01_CD23_shuffleMask).AsByte(); + Vector256 row01_CD = Avx2.PermuteVar8x32(rowCD.AsInt32(), crln_01_AB_CD).AsByte(); + row01_CD = Avx2.Shuffle(row01_CD, Avx.LoadVector256(shuffleVectorsPtr + (2 * 32))).AsByte(); + Vector256 crln_01_23_EF_23_CD = Avx.LoadVector256(shuffleVectorsPtr + (3 * 32)).AsInt32(); + Vector256 row01_23_EF = Avx2.PermuteVar8x32(rowEF.AsInt32(), crln_01_23_EF_23_CD).AsByte(); + Vector256 row01_EF = Avx2.Shuffle(row01_23_EF, Avx.LoadVector256(shuffleVectorsPtr + (4 * 32))).AsByte(); + + Vector256 row01 = Avx2.Or(row01_AB, Avx2.Or(row01_CD, row01_EF)); + + /* row23 - E1 D2 C3 B4 A5 A6 B5 C4 | D3 E2 F1 G0 H0 G1 F2 E3 */ + Vector256 crln_23_AB_23_45_GH = Avx.LoadVector256(shuffleVectorsPtr + (5 * 32)).AsInt32(); + Vector256 row23_45_AB = Avx2.PermuteVar8x32(rowAB.AsInt32(), crln_23_AB_23_45_GH).AsByte(); + Vector256 row23_AB = Avx2.Shuffle(row23_45_AB, Avx.LoadVector256(shuffleVectorsPtr + (6 * 32))).AsByte(); + Vector256 row23_CD = Avx2.PermuteVar8x32(rowCD.AsInt32(), crln_01_23_EF_23_CD).AsByte(); row23_CD = Avx2.Shuffle(row23_CD, Avx.LoadVector256(shuffleVectorsPtr + (7 * 32))).AsByte(); - - Vector256 row23_EF = Avx2.Shuffle(row0123_EF, Avx.LoadVector256(shuffleVectorsPtr + (8 * 32))).AsByte(); - - Vector256 row2345_GH = Avx2.PermuteVar8x32(rowsGH.AsInt32(), rows_CD01_GH23_shuffleMask).AsByte(); - Vector256 row23_GH = Avx2.Shuffle(row2345_GH, Avx.LoadVector256(shuffleVectorsPtr + (9 * 32)).AsByte()); + Vector256 row23_EF = Avx2.Shuffle(row01_23_EF, Avx.LoadVector256(shuffleVectorsPtr + (8 * 32))).AsByte(); + Vector256 row23_45_GH = Avx2.PermuteVar8x32(rowGH.AsInt32(), crln_23_AB_23_45_GH).AsByte(); + Vector256 row23_GH = Avx2.Shuffle(row23_45_GH, Avx.LoadVector256(shuffleVectorsPtr + (9 * 32))).AsByte(); Vector256 row23 = Avx2.Or(Avx2.Or(row23_AB, row23_CD), Avx2.Or(row23_EF, row23_GH)); - // rows 4 5 - Vector256 row45_AB = Avx2.Shuffle(row2345_AB, Avx.LoadVector256(shuffleVectorsPtr + (10 * 32)).AsByte()); - Vector256 row4567_CD = Avx2.PermuteVar8x32(rowsCD.AsInt32(), rows_AB23_CD45_EF67_shuffleMask).AsByte(); - Vector256 row45_CD = Avx2.Shuffle(row4567_CD, Avx.LoadVector256(shuffleVectorsPtr + (11 * 32)).AsByte()); - - Vector256 rows_EF45_GH67_shuffleMask = Avx.LoadVector256(shuffleVectorsPtr + (12 * 32)).AsInt32(); - Vector256 row45_EF = Avx2.PermuteVar8x32(rowsEF.AsInt32(), rows_EF45_GH67_shuffleMask).AsByte(); - row45_EF = Avx2.Shuffle(row45_EF, Avx.LoadVector256(shuffleVectorsPtr + (13 * 32)).AsByte()); - - Vector256 row45_GH = Avx2.Shuffle(row2345_GH, Avx.LoadVector256(shuffleVectorsPtr + (14 * 32)).AsByte()); + /* row45 - D4 C5 B6 A7 B7 C6 D5 E4 | F3 G2 H1 H2 G3 F4 E5 D6 */ + Vector256 row45_AB = Avx2.Shuffle(row23_45_AB, Avx.LoadVector256(shuffleVectorsPtr + (10 * 32))).AsByte(); + Vector256 crln_45_67_CD_45_EF = Avx.LoadVector256(shuffleVectorsPtr + (11 * 32)).AsInt32(); + Vector256 row45_67_CD = Avx2.PermuteVar8x32(rowCD.AsInt32(), crln_45_67_CD_45_EF).AsByte(); + Vector256 row45_CD = Avx2.Shuffle(row45_67_CD, Avx.LoadVector256(shuffleVectorsPtr + (12 * 32))).AsByte(); + Vector256 row45_EF = Avx2.PermuteVar8x32(rowEF.AsInt32(), crln_45_67_CD_45_EF).AsByte(); + row45_EF = Avx2.Shuffle(row45_EF, Avx.LoadVector256(shuffleVectorsPtr + (13 * 32))).AsByte(); + Vector256 row45_GH = Avx2.Shuffle(row23_45_GH, Avx.LoadVector256(shuffleVectorsPtr + (14 * 32))).AsByte(); Vector256 row45 = Avx2.Or(Avx2.Or(row45_AB, row45_CD), Avx2.Or(row45_EF, row45_GH)); - // rows 6 7 - Vector256 row67_CD = Avx2.Shuffle(row4567_CD, Avx.LoadVector256(shuffleVectorsPtr + (15 * 32)).AsByte()); - - Vector256 row67_EF = Avx2.PermuteVar8x32(rowsEF.AsInt32(), rows_AB23_CD45_EF67_shuffleMask).AsByte(); - row67_EF = Avx2.Shuffle(row67_EF, Avx.LoadVector256(shuffleVectorsPtr + (16 * 32)).AsByte()); - - Vector256 row67_GH = Avx2.PermuteVar8x32(rowsGH.AsInt32(), rows_EF45_GH67_shuffleMask).AsByte(); - row67_GH = Avx2.Shuffle(row67_GH, Avx.LoadVector256(shuffleVectorsPtr + (17 * 32)).AsByte()); + /* row67 - C7 D7 E6 F5 G4 H3 H4 G5 | F6 E7 F7 G6 H5 H6 G7 H7 */ + Vector256 row67_CD = Avx2.Shuffle(row45_67_CD, Avx.LoadVector256(shuffleVectorsPtr + (15 * 32))).AsByte(); + Vector256 crln_67_EF_67_GH = Avx.LoadVector256(shuffleVectorsPtr + (16 * 32)).AsInt32(); + Vector256 row67_EF = Avx2.PermuteVar8x32(rowEF.AsInt32(), crln_67_EF_67_GH).AsByte(); + row67_EF = Avx2.Shuffle(row67_EF, Avx.LoadVector256(shuffleVectorsPtr + (17 * 32))).AsByte(); + Vector256 row67_GH = Avx2.PermuteVar8x32(rowGH.AsInt32(), crln_67_EF_67_GH).AsByte(); + row67_GH = Avx2.Shuffle(row67_GH, Avx.LoadVector256(shuffleVectorsPtr + (18 * 32))).AsByte(); - Vector256 row67 = Avx2.Or(Avx2.Or(row67_CD, row67_EF), row67_GH); + Vector256 row67 = Avx2.Or(row67_CD, Avx2.Or(row67_EF, row67_GH)); block.V01 = row01.AsInt16(); block.V23 = row23.AsInt16(); From 219119aad48ddaed26f5e870aac2ec2e08c55a3a Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Tue, 25 Jan 2022 15:00:31 +0300 Subject: [PATCH 02/14] Tests fix --- tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs | 2 +- tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index ae7e81254b..9576cbd3c8 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -220,7 +220,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg // Reference implementation quantizes given block via division Block8x8 expected = default; - ReferenceImplementations.Quantize(ref source, ref expected, ref quant, ZigZag.ZigZagOrder); + ReferenceImplementations.Quantize(ref source, ref expected, ref quant, ZigZag.TransposingOrder); // Actual current implementation quantizes given block via multiplication // With quantization table reciprocal diff --git a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs index 36570ce55a..9c467a1cc9 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs @@ -135,10 +135,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix); srcBlock.MultiplyInPlace(ref dequantMatrix); + // testee // IDCT implementation tranforms blocks after transposition srcBlock.TransposeInplace(); - - // IDCT calculation FastFloatingPointDCT.TransformIDCT(ref srcBlock); float[] actualDest = srcBlock.ToArray(); @@ -180,7 +179,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true); // testee + // Second transpose call is done by Quantize step + // Do this manually here just to be complient to the reference implementation FastFloatingPointDCT.TransformFDCT(ref block); + block.TransposeInplace(); // Part of the IDCT calculations is fused into the quantization step // We must multiply input block with adjusted no-quantization matrix From 69458f4f8df9e00c1e2e8e0e0166aa3577320688 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Tue, 25 Jan 2022 21:56:14 +0300 Subject: [PATCH 03/14] gfoidl len check removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Günther Foidl --- src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs index e5faf97257..ca66519eb4 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs @@ -149,7 +149,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { DebugGuard.IsTrue(Ssse3.IsSupported, "Ssse3 support is required to run this operation!"); - fixed (byte* shuffleVectorsPtr = SseShuffleMasks) + fixed (byte* shuffleVectorsPtr = &MemoryMarshal.GetReference(SseShuffleMasks) { Vector128 rowA = block.V0.AsByte(); Vector128 rowB = block.V1.AsByte(); From b6400c287b7454dfd1d0326e897984841d2b1416 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Wed, 26 Jan 2022 13:44:12 +0300 Subject: [PATCH 04/14] Fixed compilation error --- src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs index ca66519eb4..850de26c30 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs @@ -3,6 +3,7 @@ #if SUPPORTS_RUNTIME_INTRINSICS using System; +using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -149,7 +150,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { DebugGuard.IsTrue(Ssse3.IsSupported, "Ssse3 support is required to run this operation!"); - fixed (byte* shuffleVectorsPtr = &MemoryMarshal.GetReference(SseShuffleMasks) + fixed (byte* shuffleVectorsPtr = &MemoryMarshal.GetReference(SseShuffleMasks)) { Vector128 rowA = block.V0.AsByte(); Vector128 rowB = block.V1.AsByte(); @@ -243,7 +244,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!"); - fixed (byte* shuffleVectorsPtr = AvxShuffleMasks) + fixed (byte* shuffleVectorsPtr = &MemoryMarshal.GetReference(AvxShuffleMasks)) { Vector256 rowAB = block.V01.AsByte(); Vector256 rowCD = block.V23.AsByte(); From 96c1c725f94dcb8d5882c22af68c3f46a9d5582e Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 30 Jan 2022 18:41:56 +0100 Subject: [PATCH 05/14] Write ALPH chunk (only uncompressed for now) --- src/ImageSharp/Formats/Webp/AlphaEncoder.cs | 42 ++++++++++++++++++ .../Formats/Webp/BitWriter/BitWriterBase.cs | 43 ++++++++++++++++++- .../Formats/Webp/BitWriter/Vp8BitWriter.cs | 35 ++++++++++++--- .../Formats/Webp/Lossy/Vp8Encoder.cs | 13 ++++-- .../Formats/Webp/Lossy/YuvConversion.cs | 11 ++++- 5 files changed, 133 insertions(+), 11 deletions(-) create mode 100644 src/ImageSharp/Formats/Webp/AlphaEncoder.cs diff --git a/src/ImageSharp/Formats/Webp/AlphaEncoder.cs b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs new file mode 100644 index 0000000000..06c114c71f --- /dev/null +++ b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs @@ -0,0 +1,42 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Buffers; +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Formats.Webp +{ + /// + /// Encodes the alpha channel data. + /// Data is either compressed as lossless webp image or uncompressed. + /// + internal static class AlphaEncoder + { + public static byte[] EncodeAlpha(Image image, Configuration configuration, MemoryAllocator memoryAllocator) + where TPixel : unmanaged, IPixel + { + Buffer2D imageBuffer = image.Frames.RootFrame.PixelBuffer; + int height = image.Height; + int width = image.Width; + byte[] alphaData = new byte[width * height]; + + using IMemoryOwner rowBuffer = memoryAllocator.Allocate(width); + Span rgbaRow = rowBuffer.GetSpan(); + + for (int y = 0; y < height; y++) + { + Span rowSpan = imageBuffer.DangerousGetRowSpan(y); + PixelOperations.Instance.ToRgba32(configuration, rowSpan, rgbaRow); + int offset = y * width; + for (int x = 0; x < width; x++) + { + alphaData[offset + x] = rgbaRow[x].A; + } + } + + return alphaData; + } + } +} diff --git a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs index ac039be797..b33f7987c1 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs @@ -94,7 +94,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter /// Calculates the chunk size of EXIF or XMP metadata. /// /// The metadata profile bytes. - /// The exif chunk size in bytes. + /// The metadata chunk size in bytes. protected uint MetadataChunkSize(byte[] metadataBytes) { uint metaSize = (uint)metadataBytes.Length; @@ -103,6 +103,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter return metaChunkSize; } + /// + /// Calculates the chunk size of a alpha chunk. + /// + /// The alpha chunk bytes. + /// The alpha data chunk size in bytes. + protected uint AlphaChunkSize(byte[] alphaBytes) + { + uint alphaSize = (uint)alphaBytes.Length + 1; + uint alphaChunkSize = WebpConstants.ChunkHeaderSize + alphaSize + (alphaSize & 1); + + return alphaChunkSize; + } + /// /// Writes a metadata profile (EXIF or XMP) to the stream. /// @@ -128,6 +141,34 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter } } + /// + /// Writes the alpha chunk to the stream. + /// + /// The stream to write to. + /// The alpha channel data bytes. + protected void WriteAlphaChunk(Stream stream, byte[] dataBytes) + { + DebugGuard.NotNull(dataBytes, nameof(dataBytes)); + + uint size = (uint)dataBytes.Length + 1; + Span buf = this.scratchBuffer.AsSpan(0, 4); + BinaryPrimitives.WriteUInt32BigEndian(buf, (uint)WebpChunkType.Alpha); + stream.Write(buf); + BinaryPrimitives.WriteUInt32LittleEndian(buf, size); + stream.Write(buf); + + // Write flags, all zero for now. + stream.WriteByte(0); + + stream.Write(dataBytes); + + // Add padding byte if needed. + if ((size & 1) == 1) + { + stream.WriteByte(0); + } + } + /// /// Writes a VP8X header to the stream. /// diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs index 4e91bedb0b..3f16fc89bc 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs @@ -409,7 +409,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter /// The width of the image. /// The height of the image. /// Flag indicating, if a alpha channel is present. - public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, XmpProfile xmpProfile, uint width, uint height, bool hasAlpha) + /// The alpha channel data. + public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, XmpProfile xmpProfile, uint width, uint height, bool hasAlpha, byte[] alphaData) { bool isVp8X = false; byte[] exifBytes = null; @@ -418,7 +419,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter if (exifProfile != null) { isVp8X = true; - riffSize += ExtendedFileChunkSize; exifBytes = exifProfile.ToByteArray(); riffSize += this.MetadataChunkSize(exifBytes); } @@ -426,11 +426,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter if (xmpProfile != null) { isVp8X = true; - riffSize += ExtendedFileChunkSize; xmpBytes = xmpProfile.Data; riffSize += this.MetadataChunkSize(xmpBytes); } + if (hasAlpha) + { + isVp8X = true; + riffSize += this.AlphaChunkSize(alphaData); + } + + if (isVp8X) + { + riffSize += ExtendedFileChunkSize; + } + this.Finish(); uint numBytes = (uint)this.NumBytes(); int mbSize = this.enc.Mbw * this.enc.Mbh; @@ -451,7 +461,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter riffSize += WebpConstants.TagSize + WebpConstants.ChunkHeaderSize + vp8Size; // Emit headers and partition #0 - this.WriteWebpHeaders(stream, size0, vp8Size, riffSize, isVp8X, width, height, exifProfile, xmpProfile, hasAlpha); + this.WriteWebpHeaders(stream, size0, vp8Size, riffSize, isVp8X, width, height, exifProfile, xmpProfile, hasAlpha, alphaData); bitWriterPartZero.WriteToStream(stream); // Write the encoded image to the stream. @@ -639,7 +649,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter while (it.Next()); } - private void WriteWebpHeaders(Stream stream, uint size0, uint vp8Size, uint riffSize, bool isVp8X, uint width, uint height, ExifProfile exifProfile, XmpProfile xmpProfile, bool hasAlpha) + private void WriteWebpHeaders( + Stream stream, + uint size0, + uint vp8Size, + uint riffSize, + bool isVp8X, + uint width, + uint height, + ExifProfile exifProfile, + XmpProfile xmpProfile, + bool hasAlpha, + byte[] alphaData) { this.WriteRiffHeader(stream, riffSize); @@ -647,6 +668,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter if (isVp8X) { this.WriteVp8XHeader(stream, exifProfile, xmpProfile, width, height, hasAlpha); + if (hasAlpha) + { + this.WriteAlphaChunk(stream, alphaData); + } } this.WriteVp8Header(stream, vp8Size); diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index 0222320502..48af53960c 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -300,7 +300,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Span y = this.Y.GetSpan(); Span u = this.U.GetSpan(); Span v = this.V.GetSpan(); - YuvConversion.ConvertRgbToYuv(image, this.configuration, this.memoryAllocator, y, u, v); + bool hasAlpha = YuvConversion.ConvertRgbToYuv(image, this.configuration, this.memoryAllocator, y, u, v); int yStride = width; int uvStride = (yStride + 1) >> 1; @@ -322,8 +322,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy int expectedSize = this.Mbw * this.Mbh * averageBytesPerMacroBlock; this.bitWriter = new Vp8BitWriter(expectedSize, this); - // TODO: EncodeAlpha(); - bool hasAlpha = false; + // Extract and encode alpha data, if present. + byte[] alphaData = null; + if (hasAlpha) + { + // TODO: This can potentially run in an separate task. + alphaData = AlphaEncoder.EncodeAlpha(image, this.configuration, this.memoryAllocator); + } // Stats-collection loop. this.StatLoop(width, height, yStride, uvStride); @@ -358,7 +363,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // Write bytes from the bitwriter buffer to the stream. ImageMetadata metadata = image.Metadata; metadata.SyncProfiles(); - this.bitWriter.WriteEncodedImageToStream(stream, metadata.ExifProfile, metadata.XmpProfile, (uint)width, (uint)height, hasAlpha); + this.bitWriter.WriteEncodedImageToStream(stream, metadata.ExifProfile, metadata.XmpProfile, (uint)width, (uint)height, hasAlpha, alphaData); } /// diff --git a/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs b/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs index 7a731f4284..878bebd105 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs @@ -318,7 +318,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// Span to store the luma component of the image. /// Span to store the u component of the image. /// Span to store the v component of the image. - public static void ConvertRgbToYuv(Image image, Configuration configuration, MemoryAllocator memoryAllocator, Span y, Span u, Span v) + /// true, if the image contains alpha data. + public static bool ConvertRgbToYuv(Image image, Configuration configuration, MemoryAllocator memoryAllocator, Span y, Span u, Span v) where TPixel : unmanaged, IPixel { Buffer2D imageBuffer = image.Frames.RootFrame.PixelBuffer; @@ -335,6 +336,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Span bgraRow1 = bgraRow1Buffer.GetSpan(); int uvRowIndex = 0; int rowIndex; + bool hasAlpha = false; for (rowIndex = 0; rowIndex < height - 1; rowIndex += 2) { Span rowSpan = imageBuffer.DangerousGetRowSpan(rowIndex); @@ -343,6 +345,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy PixelOperations.Instance.ToBgra32(configuration, nextRowSpan, bgraRow1); bool rowsHaveAlpha = WebpCommonUtils.CheckNonOpaque(bgraRow0) && WebpCommonUtils.CheckNonOpaque(bgraRow1); + if (rowsHaveAlpha) + { + hasAlpha = true; + } // Downsample U/V planes, two rows at a time. if (!rowsHaveAlpha) @@ -375,10 +381,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy else { AccumulateRgba(bgraRow0, bgraRow0, tmpRgbSpan, width); + hasAlpha = true; } ConvertRgbaToUv(tmpRgbSpan, u.Slice(uvRowIndex * uvWidth), v.Slice(uvRowIndex * uvWidth), uvWidth); } + + return hasAlpha; } /// From 6078c0eb925a0ec02905d3bd6bfd370ae7cccbac Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Tue, 1 Feb 2022 01:50:10 +0100 Subject: [PATCH 06/14] Revert "attempt to re-enable RentReturnRelease_SubsequentRentReturnsDifferentHandles on Mac" This reverts commit bbbf687477a66843219552b445562688ee843f9b. --- .../Memory/Allocators/UniformUnmanagedMemoryPoolTests.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/Memory/Allocators/UniformUnmanagedMemoryPoolTests.cs b/tests/ImageSharp.Tests/Memory/Allocators/UniformUnmanagedMemoryPoolTests.cs index 4ab2c93a52..7d98eff611 100644 --- a/tests/ImageSharp.Tests/Memory/Allocators/UniformUnmanagedMemoryPoolTests.cs +++ b/tests/ImageSharp.Tests/Memory/Allocators/UniformUnmanagedMemoryPoolTests.cs @@ -245,7 +245,10 @@ namespace SixLabors.ImageSharp.Tests.Memory.Allocators cleanup.Register(b1); } - [Theory] + public static readonly bool IsNotMacOS = !TestEnvironment.IsOSX; + + // TODO: Investigate MacOS failures + [ConditionalTheory(nameof(IsNotMacOS))] [InlineData(false)] [InlineData(true)] public void RentReturnRelease_SubsequentRentReturnsDifferentHandles(bool multiple) From d1929412289d3c8bb5bc3974cc47e0b811f0e75f Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 1 Feb 2022 10:23:08 +0100 Subject: [PATCH 07/14] Add lossless alpha compression --- src/ImageSharp/Formats/Webp/AlphaEncoder.cs | 86 ++++++++++++++++++- .../Formats/Webp/BitWriter/BitWriterBase.cs | 11 ++- .../Formats/Webp/BitWriter/Vp8BitWriter.cs | 10 ++- .../Formats/Webp/Lossless/Vp8LEncoder.cs | 31 ++++++- .../Formats/Webp/Lossy/Vp8Encoder.cs | 14 +-- .../Formats/Webp/WebpEncoderCore.cs | 6 +- 6 files changed, 134 insertions(+), 24 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/AlphaEncoder.cs b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs index 06c114c71f..571da5bb24 100644 --- a/src/ImageSharp/Formats/Webp/AlphaEncoder.cs +++ b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs @@ -3,18 +3,98 @@ using System; using System.Buffers; +using SixLabors.ImageSharp.Advanced; +using SixLabors.ImageSharp.Formats.Webp.Lossless; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Formats.Webp { /// - /// Encodes the alpha channel data. - /// Data is either compressed as lossless webp image or uncompressed. + /// Methods for encoding the alpha data of a VP8 image. /// internal static class AlphaEncoder { - public static byte[] EncodeAlpha(Image image, Configuration configuration, MemoryAllocator memoryAllocator) + /// + /// Encodes the alpha channel data. + /// Data is either compressed as lossless webp image or uncompressed. + /// + /// The pixel format. + /// The to encode from. + /// The global configuration. + /// The memory manager. + /// Indicates, if the data should be compressed with the lossless webp compression. + /// The alpha data. + public static byte[] EncodeAlpha(Image image, Configuration configuration, MemoryAllocator memoryAllocator, bool compress) + where TPixel : unmanaged, IPixel + { + byte[] alphaData = ExtractAlphaChannel(image, configuration, memoryAllocator); + int width = image.Width; + int height = image.Height; + if (compress) + { + WebpEncodingMethod effort = WebpEncodingMethod.Default; + int quality = 8 * (int)effort; + using var lossLessEncoder = new Vp8LEncoder( + memoryAllocator, + configuration, + width, + height, + quality, + effort, + WebpTransparentColorMode.Preserve, + false, + 0); + + // The transparency information will be stored in the green channel of the ARGB quadruplet. + // The green channel is allowed extra transformation steps in the specification -- unlike the other channels, + // that can improve compression. + using Image alphaAsImage = DispatchAlphaToGreen(image, alphaData); + + return lossLessEncoder.EncodeAlphaImageData(alphaAsImage); + } + + return alphaData; + } + + /// + /// Store the transparency in the green channel. + /// + /// The pixel format. + /// The to encode from. + /// A byte sequence of length width * height, containing all the 8-bit transparency values in scan order. + /// The transparency image. + private static Image DispatchAlphaToGreen(Image image, byte[] alphaData) + where TPixel : unmanaged, IPixel + { + int width = image.Width; + int height = image.Height; + var alphaAsImage = new Image(width, height); + + for (int y = 0; y < height; y++) + { + Memory rowBuffer = alphaAsImage.DangerousGetPixelRowMemory(y); + Span pixelRow = rowBuffer.Span; + Span alphaRow = alphaData.AsSpan(y * width, width); + for (int x = 0; x < width; x++) + { + // Leave A/R/B channels zero'd. + pixelRow[x] = new Rgba32(0, alphaRow[x], 0, 0); + } + } + + return alphaAsImage; + } + + /// + /// Extract the alpha data of the image. + /// + /// The pixel format. + /// The to encode from. + /// The global configuration. + /// The memory manager. + /// A byte sequence of length width * height, containing all the 8-bit transparency values in scan order. + private static byte[] ExtractAlphaChannel(Image image, Configuration configuration, MemoryAllocator memoryAllocator) where TPixel : unmanaged, IPixel { Buffer2D imageBuffer = image.Frames.RootFrame.PixelBuffer; diff --git a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs index b33f7987c1..84c9d3f133 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs @@ -146,7 +146,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter /// /// The stream to write to. /// The alpha channel data bytes. - protected void WriteAlphaChunk(Stream stream, byte[] dataBytes) + /// Indicates, if the alpha channel data is compressed. + protected void WriteAlphaChunk(Stream stream, byte[] dataBytes, bool alphaDataIsCompressed) { DebugGuard.NotNull(dataBytes, nameof(dataBytes)); @@ -157,9 +158,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter BinaryPrimitives.WriteUInt32LittleEndian(buf, size); stream.Write(buf); - // Write flags, all zero for now. - stream.WriteByte(0); + byte flags = 0; + if (alphaDataIsCompressed) + { + flags |= 1; + } + stream.WriteByte(flags); stream.Write(dataBytes); // Add padding byte if needed. diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs index 3f16fc89bc..577a87e6a1 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs @@ -410,7 +410,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter /// The height of the image. /// Flag indicating, if a alpha channel is present. /// The alpha channel data. - public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, XmpProfile xmpProfile, uint width, uint height, bool hasAlpha, byte[] alphaData) + /// Indicates, if the alpha data is compressed. + public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, XmpProfile xmpProfile, uint width, uint height, bool hasAlpha, byte[] alphaData, bool alphaDataIsCompressed) { bool isVp8X = false; byte[] exifBytes = null; @@ -461,7 +462,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter riffSize += WebpConstants.TagSize + WebpConstants.ChunkHeaderSize + vp8Size; // Emit headers and partition #0 - this.WriteWebpHeaders(stream, size0, vp8Size, riffSize, isVp8X, width, height, exifProfile, xmpProfile, hasAlpha, alphaData); + this.WriteWebpHeaders(stream, size0, vp8Size, riffSize, isVp8X, width, height, exifProfile, xmpProfile, hasAlpha, alphaData, alphaDataIsCompressed); bitWriterPartZero.WriteToStream(stream); // Write the encoded image to the stream. @@ -660,7 +661,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter ExifProfile exifProfile, XmpProfile xmpProfile, bool hasAlpha, - byte[] alphaData) + byte[] alphaData, + bool alphaDataIsCompressed) { this.WriteRiffHeader(stream, riffSize); @@ -670,7 +672,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter this.WriteVp8XHeader(stream, exifProfile, xmpProfile, width, height, hasAlpha); if (hasAlpha) { - this.WriteAlphaChunk(stream, alphaData); + this.WriteAlphaChunk(stream, alphaData, alphaDataIsCompressed); } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index e9dce913a3..797d0794f9 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -228,7 +228,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless public Vp8LHashChain HashChain { get; } /// - /// Encodes the image to the specified stream from the . + /// Encodes the image as lossless webp to the specified stream. /// /// The pixel format. /// The to encode from. @@ -236,10 +236,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless public void Encode(Image image, Stream stream) where TPixel : unmanaged, IPixel { - image.Metadata.SyncProfiles(); int width = image.Width; int height = image.Height; + ImageMetadata metadata = image.Metadata; + metadata.SyncProfiles(); + // Convert image pixels to bgra array. bool hasAlpha = this.ConvertPixelsToBgra(image, width, height); @@ -253,11 +255,32 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless this.EncodeStream(image); // Write bytes from the bitwriter buffer to the stream. - ImageMetadata metadata = image.Metadata; - metadata.SyncProfiles(); this.bitWriter.WriteEncodedImageToStream(stream, metadata.ExifProfile, metadata.XmpProfile, (uint)width, (uint)height, hasAlpha); } + /// + /// Encodes the alpha image data using the webp lossless compression. + /// + /// The type of the pixel. + /// The to encode from. + /// The encoded alpha stream. + public byte[] EncodeAlphaImageData(Image image) + where TPixel : unmanaged, IPixel + { + int width = image.Width; + int height = image.Height; + + // Convert image pixels to bgra array. + this.ConvertPixelsToBgra(image, width, height); + + // The image-stream does NOT contain any headers describing the image dimension, the dimension is already known. + this.EncodeStream(image); + this.bitWriter.Finish(); + using var ms = new MemoryStream(); + this.bitWriter.WriteToStream(ms); + return ms.ToArray(); + } + /// /// Writes the image size to the bitwriter buffer. /// diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index 48af53960c..d8bd8f759c 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -71,10 +71,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// private int uvAlpha; - /// - /// Scratch buffer to reduce allocations. - /// - private readonly int[] scratch = new int[16]; + private readonly bool alphaCompression; private readonly byte[] averageBytesPerMb = { 50, 24, 16, 9, 7, 5, 3, 2 }; @@ -105,6 +102,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// Number of entropy-analysis passes (in [1..10]). /// The filter the strength of the deblocking filter, between 0 (no filtering) and 100 (maximum filtering). /// The spatial noise shaping. 0=off, 100=maximum. + /// If true, the alpha channel will be compressed with the lossless compression. public Vp8Encoder( MemoryAllocator memoryAllocator, Configuration configuration, @@ -114,7 +112,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy WebpEncodingMethod method, int entropyPasses, int filterStrength, - int spatialNoiseShaping) + int spatialNoiseShaping, + bool alphaCompression) { this.memoryAllocator = memoryAllocator; this.configuration = configuration; @@ -125,6 +124,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.entropyPasses = Numerics.Clamp(entropyPasses, 1, 10); this.filterStrength = Numerics.Clamp(filterStrength, 0, 100); this.spatialNoiseShaping = Numerics.Clamp(spatialNoiseShaping, 0, 100); + this.alphaCompression = alphaCompression; this.rdOptLevel = method is WebpEncodingMethod.BestQuality ? Vp8RdLevel.RdOptTrellisAll : method >= WebpEncodingMethod.Level5 ? Vp8RdLevel.RdOptTrellis : method >= WebpEncodingMethod.Level3 ? Vp8RdLevel.RdOptBasic @@ -327,7 +327,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy if (hasAlpha) { // TODO: This can potentially run in an separate task. - alphaData = AlphaEncoder.EncodeAlpha(image, this.configuration, this.memoryAllocator); + alphaData = AlphaEncoder.EncodeAlpha(image, this.configuration, this.memoryAllocator, this.alphaCompression); } // Stats-collection loop. @@ -363,7 +363,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // Write bytes from the bitwriter buffer to the stream. ImageMetadata metadata = image.Metadata; metadata.SyncProfiles(); - this.bitWriter.WriteEncodedImageToStream(stream, metadata.ExifProfile, metadata.XmpProfile, (uint)width, (uint)height, hasAlpha, alphaData); + this.bitWriter.WriteEncodedImageToStream(stream, metadata.ExifProfile, metadata.XmpProfile, (uint)width, (uint)height, hasAlpha, alphaData, this.alphaCompression); } /// diff --git a/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs b/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs index 195fa62bdc..deed08b729 100644 --- a/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs +++ b/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs @@ -22,7 +22,6 @@ namespace SixLabors.ImageSharp.Formats.Webp private readonly MemoryAllocator memoryAllocator; /// - /// TODO: not used at the moment. /// Indicating whether the alpha plane should be compressed with Webp lossless format. /// private readonly bool alphaCompression; @@ -100,7 +99,7 @@ namespace SixLabors.ImageSharp.Formats.Webp } /// - /// Encodes the image to the specified stream from the . + /// Encodes the image as webp to the specified stream. /// /// The pixel format. /// The to encode from. @@ -149,7 +148,8 @@ namespace SixLabors.ImageSharp.Formats.Webp this.method, this.entropyPasses, this.filterStrength, - this.spatialNoiseShaping); + this.spatialNoiseShaping, + this.alphaCompression); enc.Encode(image, stream); } } From 8b8993dadc37a8a7970d4bcf26b1f043b860df60 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 1 Feb 2022 11:19:40 +0100 Subject: [PATCH 08/14] Add encode lossy webp with alpha tests --- .../Formats/WebP/WebpEncoderTests.cs | 40 +++++++++++++------ tests/ImageSharp.Tests/TestImages.cs | 1 + tests/Images/Input/Png/transparency.png | 3 ++ 3 files changed, 32 insertions(+), 12 deletions(-) create mode 100644 tests/Images/Input/Png/transparency.png diff --git a/tests/ImageSharp.Tests/Formats/WebP/WebpEncoderTests.cs b/tests/ImageSharp.Tests/Formats/WebP/WebpEncoderTests.cs index 7043549b22..7c74429edc 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/WebpEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/WebpEncoderTests.cs @@ -167,18 +167,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp image.VerifyEncoder(provider, "webp", testOutputDetails, encoder); } - [Theory] - [WithFile(TestPatternOpaque, PixelTypes.Rgba32)] - [WithFile(TestPatternOpaqueSmall, PixelTypes.Rgba32)] - public void Encode_Lossless_WorksWithTestPattern(TestImageProvider provider) - where TPixel : unmanaged, IPixel - { - using Image image = provider.GetImage(); - - var encoder = new WebpEncoder() { FileFormat = WebpFileFormatType.Lossless }; - image.VerifyEncoder(provider, "webp", string.Empty, encoder); - } - [Fact] public void Encode_Lossless_OneByOnePixel_Works() { @@ -279,6 +267,34 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp image.VerifyEncoder(provider, "webp", testOutputDetails, encoder, customComparer: GetComparer(quality)); } + [Theory] + [WithFile(TestImages.Png.Transparency, PixelTypes.Rgba32, false)] + [WithFile(TestImages.Png.Transparency, PixelTypes.Rgba32, true)] + public void Encode_Lossy_WithAlpha_Works(TestImageProvider provider, bool compressed) + where TPixel : unmanaged, IPixel + { + var encoder = new WebpEncoder() + { + FileFormat = WebpFileFormatType.Lossy, + UseAlphaCompression = compressed + }; + + using Image image = provider.GetImage(); + image.VerifyEncoder(provider, "webp", $"with_alpha_compressed_{compressed}", encoder, ImageComparer.Tolerant(0.04f)); + } + + [Theory] + [WithFile(TestPatternOpaque, PixelTypes.Rgba32)] + [WithFile(TestPatternOpaqueSmall, PixelTypes.Rgba32)] + public void Encode_Lossless_WorksWithTestPattern(TestImageProvider provider) + where TPixel : unmanaged, IPixel + { + using Image image = provider.GetImage(); + + var encoder = new WebpEncoder() { FileFormat = WebpFileFormatType.Lossless }; + image.VerifyEncoder(provider, "webp", string.Empty, encoder); + } + [Theory] [WithFile(TestPatternOpaque, PixelTypes.Rgba32)] [WithFile(TestPatternOpaqueSmall, PixelTypes.Rgba32)] diff --git a/tests/ImageSharp.Tests/TestImages.cs b/tests/ImageSharp.Tests/TestImages.cs index bce22799da..a73d262433 100644 --- a/tests/ImageSharp.Tests/TestImages.cs +++ b/tests/ImageSharp.Tests/TestImages.cs @@ -15,6 +15,7 @@ namespace SixLabors.ImageSharp.Tests { public static class Png { + public const string Transparency = "Png/transparency.png"; public const string P1 = "Png/pl.png"; public const string Pd = "Png/pd.png"; public const string Blur = "Png/blur.png"; diff --git a/tests/Images/Input/Png/transparency.png b/tests/Images/Input/Png/transparency.png new file mode 100644 index 0000000000..26de0f2d1a --- /dev/null +++ b/tests/Images/Input/Png/transparency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843bea4db378f52935e2f19f60d289df8ebe20ddde3977c63225f1d58a10bd62 +size 48119 From d398ae744555d87cf71da22a02ed8fb50127ff00 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 1 Feb 2022 11:25:03 +0100 Subject: [PATCH 09/14] Default alpha compression to true --- src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs | 1 + src/ImageSharp/Formats/Webp/WebpEncoder.cs | 2 +- src/ImageSharp/Formats/Webp/WebpEncoderCore.cs | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs b/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs index d119d3031f..57ec32753d 100644 --- a/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs +++ b/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs @@ -31,6 +31,7 @@ namespace SixLabors.ImageSharp.Formats.Webp /// /// Gets a value indicating whether the alpha plane should be compressed with Webp lossless format. + /// Defaults to true. /// bool UseAlphaCompression { get; } diff --git a/src/ImageSharp/Formats/Webp/WebpEncoder.cs b/src/ImageSharp/Formats/Webp/WebpEncoder.cs index bdcbb194b1..d0b60d18cd 100644 --- a/src/ImageSharp/Formats/Webp/WebpEncoder.cs +++ b/src/ImageSharp/Formats/Webp/WebpEncoder.cs @@ -24,7 +24,7 @@ namespace SixLabors.ImageSharp.Formats.Webp public WebpEncodingMethod Method { get; set; } = WebpEncodingMethod.Default; /// - public bool UseAlphaCompression { get; set; } + public bool UseAlphaCompression { get; set; } = true; /// public int EntropyPasses { get; set; } = 1; diff --git a/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs b/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs index deed08b729..0fbff81fe4 100644 --- a/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs +++ b/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs @@ -23,6 +23,7 @@ namespace SixLabors.ImageSharp.Formats.Webp /// /// Indicating whether the alpha plane should be compressed with Webp lossless format. + /// Defaults to true. /// private readonly bool alphaCompression; From cf672b99aeaa80d734edfa0a7ca0e2e36b590526 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 1 Feb 2022 12:10:19 +0100 Subject: [PATCH 10/14] Use memory allocator for alpha data --- src/ImageSharp/Formats/Webp/AlphaEncoder.cs | 26 ++++++++++++------- .../Formats/Webp/BitWriter/BitWriterBase.cs | 12 ++++++--- .../Formats/Webp/BitWriter/Vp8BitWriter.cs | 12 +++++++-- .../Formats/Webp/Lossless/Vp8LEncoder.cs | 13 +++++----- .../Formats/Webp/Lossy/Vp8Encoder.cs | 22 +++++++++++++--- 5 files changed, 59 insertions(+), 26 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/AlphaEncoder.cs b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs index 571da5bb24..38497281ff 100644 --- a/src/ImageSharp/Formats/Webp/AlphaEncoder.cs +++ b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs @@ -24,13 +24,15 @@ namespace SixLabors.ImageSharp.Formats.Webp /// The global configuration. /// The memory manager. /// Indicates, if the data should be compressed with the lossless webp compression. - /// The alpha data. - public static byte[] EncodeAlpha(Image image, Configuration configuration, MemoryAllocator memoryAllocator, bool compress) + /// The size in bytes of the alpha data. + /// The encoded alpha data. + public static IMemoryOwner EncodeAlpha(Image image, Configuration configuration, MemoryAllocator memoryAllocator, bool compress, out int size) where TPixel : unmanaged, IPixel { - byte[] alphaData = ExtractAlphaChannel(image, configuration, memoryAllocator); int width = image.Width; int height = image.Height; + IMemoryOwner alphaData = ExtractAlphaChannel(image, configuration, memoryAllocator); + if (compress) { WebpEncodingMethod effort = WebpEncodingMethod.Default; @@ -49,11 +51,14 @@ namespace SixLabors.ImageSharp.Formats.Webp // The transparency information will be stored in the green channel of the ARGB quadruplet. // The green channel is allowed extra transformation steps in the specification -- unlike the other channels, // that can improve compression. - using Image alphaAsImage = DispatchAlphaToGreen(image, alphaData); + using Image alphaAsImage = DispatchAlphaToGreen(image, alphaData.GetSpan()); + + size = lossLessEncoder.EncodeAlphaImageData(alphaAsImage, alphaData); - return lossLessEncoder.EncodeAlphaImageData(alphaAsImage); + return alphaData; } + size = width * height; return alphaData; } @@ -64,7 +69,7 @@ namespace SixLabors.ImageSharp.Formats.Webp /// The to encode from. /// A byte sequence of length width * height, containing all the 8-bit transparency values in scan order. /// The transparency image. - private static Image DispatchAlphaToGreen(Image image, byte[] alphaData) + private static Image DispatchAlphaToGreen(Image image, Span alphaData) where TPixel : unmanaged, IPixel { int width = image.Width; @@ -75,7 +80,7 @@ namespace SixLabors.ImageSharp.Formats.Webp { Memory rowBuffer = alphaAsImage.DangerousGetPixelRowMemory(y); Span pixelRow = rowBuffer.Span; - Span alphaRow = alphaData.AsSpan(y * width, width); + Span alphaRow = alphaData.Slice(y * width, width); for (int x = 0; x < width; x++) { // Leave A/R/B channels zero'd. @@ -94,13 +99,14 @@ namespace SixLabors.ImageSharp.Formats.Webp /// The global configuration. /// The memory manager. /// A byte sequence of length width * height, containing all the 8-bit transparency values in scan order. - private static byte[] ExtractAlphaChannel(Image image, Configuration configuration, MemoryAllocator memoryAllocator) + private static IMemoryOwner ExtractAlphaChannel(Image image, Configuration configuration, MemoryAllocator memoryAllocator) where TPixel : unmanaged, IPixel { Buffer2D imageBuffer = image.Frames.RootFrame.PixelBuffer; int height = image.Height; int width = image.Width; - byte[] alphaData = new byte[width * height]; + IMemoryOwner alphaDataBuffer = memoryAllocator.Allocate(width * height); + Span alphaData = alphaDataBuffer.GetSpan(); using IMemoryOwner rowBuffer = memoryAllocator.Allocate(width); Span rgbaRow = rowBuffer.GetSpan(); @@ -116,7 +122,7 @@ namespace SixLabors.ImageSharp.Formats.Webp } } - return alphaData; + return alphaDataBuffer; } } } diff --git a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs index 84c9d3f133..fc1accfdee 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs @@ -47,6 +47,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter /// The stream to write to. public void WriteToStream(Stream stream) => stream.Write(this.Buffer.AsSpan(0, this.NumBytes())); + /// + /// Writes the encoded bytes of the image to the given buffer. Call Finish() before this. + /// + /// The destination buffer. + public void WriteToBuffer(Span dest) => this.Buffer.AsSpan(0, this.NumBytes()).CopyTo(dest); + /// /// Resizes the buffer to write to. /// @@ -108,7 +114,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter /// /// The alpha chunk bytes. /// The alpha data chunk size in bytes. - protected uint AlphaChunkSize(byte[] alphaBytes) + protected uint AlphaChunkSize(Span alphaBytes) { uint alphaSize = (uint)alphaBytes.Length + 1; uint alphaChunkSize = WebpConstants.ChunkHeaderSize + alphaSize + (alphaSize & 1); @@ -147,10 +153,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter /// The stream to write to. /// The alpha channel data bytes. /// Indicates, if the alpha channel data is compressed. - protected void WriteAlphaChunk(Stream stream, byte[] dataBytes, bool alphaDataIsCompressed) + protected void WriteAlphaChunk(Stream stream, Span dataBytes, bool alphaDataIsCompressed) { - DebugGuard.NotNull(dataBytes, nameof(dataBytes)); - uint size = (uint)dataBytes.Length + 1; Span buf = this.scratchBuffer.AsSpan(0, 4); BinaryPrimitives.WriteUInt32BigEndian(buf, (uint)WebpChunkType.Alpha); diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs index 577a87e6a1..fa6e09d875 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs @@ -411,7 +411,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter /// Flag indicating, if a alpha channel is present. /// The alpha channel data. /// Indicates, if the alpha data is compressed. - public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, XmpProfile xmpProfile, uint width, uint height, bool hasAlpha, byte[] alphaData, bool alphaDataIsCompressed) + public void WriteEncodedImageToStream( + Stream stream, + ExifProfile exifProfile, + XmpProfile xmpProfile, + uint width, + uint height, + bool hasAlpha, + Span alphaData, + bool alphaDataIsCompressed) { bool isVp8X = false; byte[] exifBytes = null; @@ -661,7 +669,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter ExifProfile exifProfile, XmpProfile xmpProfile, bool hasAlpha, - byte[] alphaData, + Span alphaData, bool alphaDataIsCompressed) { this.WriteRiffHeader(stream, riffSize); diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index 797d0794f9..ece9aefd0f 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -263,8 +263,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// The type of the pixel. /// The to encode from. - /// The encoded alpha stream. - public byte[] EncodeAlphaImageData(Image image) + /// The destination buffer to write the encoded alpha data to. + /// The size of the data in bytes. + public int EncodeAlphaImageData(Image image, IMemoryOwner alphaData) where TPixel : unmanaged, IPixel { int width = image.Width; @@ -273,12 +274,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless // Convert image pixels to bgra array. this.ConvertPixelsToBgra(image, width, height); - // The image-stream does NOT contain any headers describing the image dimension, the dimension is already known. + // The image-stream will NOT contain any headers describing the image dimension, the dimension is already known. this.EncodeStream(image); this.bitWriter.Finish(); - using var ms = new MemoryStream(); - this.bitWriter.WriteToStream(ms); - return ms.ToArray(); + int size = this.bitWriter.NumBytes(); + this.bitWriter.WriteToBuffer(alphaData.GetSpan()); + return size; } /// diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index d8bd8f759c..4b7f3f5c88 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -241,6 +241,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public int DqUvDc { get; private set; } + /// + /// Gets or sets the alpha data. + /// + private IMemoryOwner AlphaData { get; set; } + /// /// Gets the luma component. /// @@ -322,12 +327,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy int expectedSize = this.Mbw * this.Mbh * averageBytesPerMacroBlock; this.bitWriter = new Vp8BitWriter(expectedSize, this); - // Extract and encode alpha data, if present. - byte[] alphaData = null; + // Extract and encode alpha channel data, if present. + int alphaDataSize = 0; if (hasAlpha) { // TODO: This can potentially run in an separate task. - alphaData = AlphaEncoder.EncodeAlpha(image, this.configuration, this.memoryAllocator, this.alphaCompression); + this.AlphaData = AlphaEncoder.EncodeAlpha(image, this.configuration, this.memoryAllocator, this.alphaCompression, out alphaDataSize); } // Stats-collection loop. @@ -363,7 +368,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // Write bytes from the bitwriter buffer to the stream. ImageMetadata metadata = image.Metadata; metadata.SyncProfiles(); - this.bitWriter.WriteEncodedImageToStream(stream, metadata.ExifProfile, metadata.XmpProfile, (uint)width, (uint)height, hasAlpha, alphaData, this.alphaCompression); + this.bitWriter.WriteEncodedImageToStream( + stream, + metadata.ExifProfile, + metadata.XmpProfile, + (uint)width, + (uint)height, + hasAlpha, + hasAlpha ? this.AlphaData.GetSpan().Slice(0, alphaDataSize) : Span.Empty, + this.alphaCompression); } /// @@ -372,6 +385,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.Y.Dispose(); this.U.Dispose(); this.V.Dispose(); + this.AlphaData?.Dispose(); } /// From b12ad7596e3e1b5c91d87332f167afe340f65226 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 1 Feb 2022 12:50:28 +0100 Subject: [PATCH 11/14] Leave alpha data uncompressed, if compression does not yield in smaller data --- src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs | 11 ++++++++++- src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs | 9 ++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index ece9aefd0f..30d65562ae 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -264,12 +264,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// The type of the pixel. /// The to encode from. /// The destination buffer to write the encoded alpha data to. - /// The size of the data in bytes. + /// The size of the compressed data in bytes. + /// If the size of the data is the same as the pixel count, the compression would not yield in smaller data and is left uncompressed. + /// public int EncodeAlphaImageData(Image image, IMemoryOwner alphaData) where TPixel : unmanaged, IPixel { int width = image.Width; int height = image.Height; + int pixelCount = width * height; // Convert image pixels to bgra array. this.ConvertPixelsToBgra(image, width, height); @@ -278,6 +281,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless this.EncodeStream(image); this.bitWriter.Finish(); int size = this.bitWriter.NumBytes(); + if (size >= pixelCount) + { + // Compressing would not yield in smaller data -> leave the data uncompressed. + return pixelCount; + } + this.bitWriter.WriteToBuffer(alphaData.GetSpan()); return size; } diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index 4b7f3f5c88..60bdee362c 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -302,6 +302,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { int width = image.Width; int height = image.Height; + int pixelCount = width * height; Span y = this.Y.GetSpan(); Span u = this.U.GetSpan(); Span v = this.V.GetSpan(); @@ -329,10 +330,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // Extract and encode alpha channel data, if present. int alphaDataSize = 0; + bool alphaCompressionSucceeded = false; if (hasAlpha) { // TODO: This can potentially run in an separate task. this.AlphaData = AlphaEncoder.EncodeAlpha(image, this.configuration, this.memoryAllocator, this.alphaCompression, out alphaDataSize); + if (alphaDataSize < pixelCount) + { + // Only use compressed data, if the compressed data is actually smaller then the uncompressed data. + alphaCompressionSucceeded = true; + } } // Stats-collection loop. @@ -376,7 +383,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy (uint)height, hasAlpha, hasAlpha ? this.AlphaData.GetSpan().Slice(0, alphaDataSize) : Span.Empty, - this.alphaCompression); + this.alphaCompression && alphaCompressionSucceeded); } /// From 192cfb03f9a35282eb62f99aa42c3bae91869181 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 1 Feb 2022 13:33:04 +0100 Subject: [PATCH 12/14] Move disposing the alpha data to the AlphaEncoder --- src/ImageSharp/Formats/Webp/AlphaEncoder.cs | 19 ++++++++++++------- .../Formats/Webp/Lossy/Vp8Encoder.cs | 13 +++++-------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/AlphaEncoder.cs b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs index 38497281ff..1019073d87 100644 --- a/src/ImageSharp/Formats/Webp/AlphaEncoder.cs +++ b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs @@ -13,8 +13,10 @@ namespace SixLabors.ImageSharp.Formats.Webp /// /// Methods for encoding the alpha data of a VP8 image. /// - internal static class AlphaEncoder + internal class AlphaEncoder : IDisposable { + private IMemoryOwner alphaData; + /// /// Encodes the alpha channel data. /// Data is either compressed as lossless webp image or uncompressed. @@ -26,12 +28,12 @@ namespace SixLabors.ImageSharp.Formats.Webp /// Indicates, if the data should be compressed with the lossless webp compression. /// The size in bytes of the alpha data. /// The encoded alpha data. - public static IMemoryOwner EncodeAlpha(Image image, Configuration configuration, MemoryAllocator memoryAllocator, bool compress, out int size) + public IMemoryOwner EncodeAlpha(Image image, Configuration configuration, MemoryAllocator memoryAllocator, bool compress, out int size) where TPixel : unmanaged, IPixel { int width = image.Width; int height = image.Height; - IMemoryOwner alphaData = ExtractAlphaChannel(image, configuration, memoryAllocator); + this.alphaData = ExtractAlphaChannel(image, configuration, memoryAllocator); if (compress) { @@ -51,15 +53,15 @@ namespace SixLabors.ImageSharp.Formats.Webp // The transparency information will be stored in the green channel of the ARGB quadruplet. // The green channel is allowed extra transformation steps in the specification -- unlike the other channels, // that can improve compression. - using Image alphaAsImage = DispatchAlphaToGreen(image, alphaData.GetSpan()); + using Image alphaAsImage = DispatchAlphaToGreen(image, this.alphaData.GetSpan()); - size = lossLessEncoder.EncodeAlphaImageData(alphaAsImage, alphaData); + size = lossLessEncoder.EncodeAlphaImageData(alphaAsImage, this.alphaData); - return alphaData; + return this.alphaData; } size = width * height; - return alphaData; + return this.alphaData; } /// @@ -124,5 +126,8 @@ namespace SixLabors.ImageSharp.Formats.Webp return alphaDataBuffer; } + + /// + public void Dispose() => this.alphaData?.Dispose(); } } diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index 60bdee362c..927b04c0cf 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -241,11 +241,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public int DqUvDc { get; private set; } - /// - /// Gets or sets the alpha data. - /// - private IMemoryOwner AlphaData { get; set; } - /// /// Gets the luma component. /// @@ -331,10 +326,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // Extract and encode alpha channel data, if present. int alphaDataSize = 0; bool alphaCompressionSucceeded = false; + using var alphaEncoder = new AlphaEncoder(); + Span alphaData = Span.Empty; if (hasAlpha) { // TODO: This can potentially run in an separate task. - this.AlphaData = AlphaEncoder.EncodeAlpha(image, this.configuration, this.memoryAllocator, this.alphaCompression, out alphaDataSize); + IMemoryOwner encodedAlphaData = alphaEncoder.EncodeAlpha(image, this.configuration, this.memoryAllocator, this.alphaCompression, out alphaDataSize); + alphaData = encodedAlphaData.GetSpan(); if (alphaDataSize < pixelCount) { // Only use compressed data, if the compressed data is actually smaller then the uncompressed data. @@ -382,7 +380,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy (uint)width, (uint)height, hasAlpha, - hasAlpha ? this.AlphaData.GetSpan().Slice(0, alphaDataSize) : Span.Empty, + alphaData, this.alphaCompression && alphaCompressionSucceeded); } @@ -392,7 +390,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.Y.Dispose(); this.U.Dispose(); this.V.Dispose(); - this.AlphaData?.Dispose(); } /// From 2491b6ab626f4329a40b9363e4058d5857d8567d Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 1 Feb 2022 16:16:35 +0100 Subject: [PATCH 13/14] Change AverageBytesPerMb to ReadOnlySpan --- src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index 927b04c0cf..695359e5ea 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -73,8 +73,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy private readonly bool alphaCompression; - private readonly byte[] averageBytesPerMb = { 50, 24, 16, 9, 7, 5, 3, 2 }; - private const int NumMbSegments = 4; private const int MaxItersKMeans = 6; @@ -174,6 +172,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.ResetBoundaryPredictions(); } + // This uses C#'s optimization to refer to the static data segment of the assembly, no allocation occurs. + private static ReadOnlySpan AverageBytesPerMb => new byte[] { 50, 24, 16, 9, 7, 5, 3, 2 }; + public int BaseQuant { get; set; } /// @@ -319,7 +320,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.SetLoopParams(this.quality); // Initialize the bitwriter. - int averageBytesPerMacroBlock = this.averageBytesPerMb[this.BaseQuant >> 4]; + int averageBytesPerMacroBlock = AverageBytesPerMb[this.BaseQuant >> 4]; int expectedSize = this.Mbw * this.Mbh * averageBytesPerMacroBlock; this.bitWriter = new Vp8BitWriter(expectedSize, this); From 85cd83f3a7606de96b0d185c3497143644366758 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Wed, 2 Feb 2022 16:22:58 +0300 Subject: [PATCH 14/14] IDisposable JpegDecoderCore is now properly disposed in tests --- .../Codecs/Jpeg/DecodeJpegParseStreamOnly.cs | 3 +-- tests/ImageSharp.Tests/Formats/Jpg/SpectralJpegTests.cs | 4 ++-- .../Formats/Jpg/SpectralToPixelConversionTests.cs | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpegParseStreamOnly.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpegParseStreamOnly.cs index 9db666c374..988c056608 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpegParseStreamOnly.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpegParseStreamOnly.cs @@ -39,10 +39,9 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg using var memoryStream = new MemoryStream(this.jpegBytes); using var bufferedStream = new BufferedReadStream(Configuration.Default, memoryStream); - var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder { IgnoreMetadata = true }); + using var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder { IgnoreMetadata = true }); var scanDecoder = new HuffmanScanDecoder(bufferedStream, new NoopSpectralConverter(), cancellationToken: default); decoder.ParseStream(bufferedStream, scanDecoder, cancellationToken: default); - decoder.Dispose(); } // We want to test only stream parsing and scan decoding, we don't need to convert spectral data to actual pixels diff --git a/tests/ImageSharp.Tests/Formats/Jpg/SpectralJpegTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/SpectralJpegTests.cs index 35113f14ff..3833b419c4 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/SpectralJpegTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/SpectralJpegTests.cs @@ -50,7 +50,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg // Calculating data from ImageSharp byte[] sourceBytes = TestFile.Create(provider.SourceFileOrDescription).Bytes; - var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder()); + using var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder()); using var ms = new MemoryStream(sourceBytes); using var bufferedStream = new BufferedReadStream(Configuration.Default, ms); @@ -79,7 +79,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg // Calculating data from ImageSharp byte[] sourceBytes = TestFile.Create(provider.SourceFileOrDescription).Bytes; - var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder()); + using var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder()); using var ms = new MemoryStream(sourceBytes); using var bufferedStream = new BufferedReadStream(Configuration.Default, ms); diff --git a/tests/ImageSharp.Tests/Formats/Jpg/SpectralToPixelConversionTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/SpectralToPixelConversionTests.cs index 0071c623c6..27240831c3 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/SpectralToPixelConversionTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/SpectralToPixelConversionTests.cs @@ -45,7 +45,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg // Decoding using var converter = new SpectralConverter(Configuration.Default); - var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder()); + using var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder()); var scanDecoder = new HuffmanScanDecoder(bufferedStream, converter, cancellationToken: default); decoder.ParseStream(bufferedStream, scanDecoder, cancellationToken: default);