diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
index f252864476..d7511fddac 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
@@ -280,7 +280,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
///
- /// Quantize input block, apply zig-zag ordering and store result as 16bit integers.
+ /// Quantize input block, transpose, apply zig-zag ordering and store as .
///
/// Source block.
/// Destination block.
@@ -291,19 +291,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
if (Avx2.IsSupported)
{
MultiplyIntoInt16_Avx2(ref block, ref qt, ref dest);
- ZigZag.ApplyZigZagOrderingAvx2(ref dest);
+ ZigZag.ApplyTransposingZigZagOrderingAvx2(ref dest);
}
else if (Ssse3.IsSupported)
{
MultiplyIntoInt16_Sse2(ref block, ref qt, ref dest);
- ZigZag.ApplyZigZagOrderingSsse3(ref dest);
+ ZigZag.ApplyTransposingZigZagOrderingSsse3(ref dest);
}
else
#endif
{
for (int i = 0; i < Size; i++)
{
- int idx = ZigZag.ZigZagOrder[i];
+ int idx = ZigZag.TransposingOrder[i];
float quantizedVal = block[idx] * qt[idx];
quantizedVal += quantizedVal < 0 ? -0.5f : 0.5f;
dest[i] = (short)quantizedVal;
diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs
index 94864005ec..8acc4b6269 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs
@@ -29,11 +29,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
DebugGuard.IsTrue(Avx.IsSupported, "Avx support is required to execute this operation.");
- // First pass - process rows
- block.TransposeInplace();
+ // First pass - process columns
FDCT8x8_1D_Avx(ref block);
- // Second pass - process columns
+ // Second pass - process rows
block.TransposeInplace();
FDCT8x8_1D_Avx(ref block);
diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
index c27ad5b82b..e1bcff30f3 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
@@ -92,6 +92,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
tableRef = 0.125f / (tableRef * Unsafe.Add(ref multipliersRef, i));
tableRef = ref Unsafe.Add(ref tableRef, 1);
}
+
+ // Spectral macroblocks are not transposed before quantization
+ // Transpose is done after quantization at zig-zag stage
+ // so we must transpose quantization table
+ quantTable.TransposeInplace();
}
///
@@ -133,14 +138,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
else
#endif
- if (Vector.IsHardwareAccelerated)
{
FDCT_Vector4(ref block);
}
- else
- {
- FDCT_Scalar(ref block);
- }
}
///
@@ -217,136 +217,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
}
- ///
- /// Apply 2D floating point FDCT inplace using scalar operations.
- ///
- ///
- /// Ported from libjpeg-turbo https://github.com/libjpeg-turbo/libjpeg-turbo/blob/main/jfdctflt.c.
- ///
- /// Input block.
- private static void FDCT_Scalar(ref Block8x8F block)
- {
- const int dctSize = 8;
-
- float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- float tmp10, tmp11, tmp12, tmp13;
- float z1, z2, z3, z4, z5, z11, z13;
-
- // First pass - process rows
- ref float blockRef = ref Unsafe.As(ref block);
- for (int ctr = 7; ctr >= 0; ctr--)
- {
- tmp0 = Unsafe.Add(ref blockRef, 0) + Unsafe.Add(ref blockRef, 7);
- tmp7 = Unsafe.Add(ref blockRef, 0) - Unsafe.Add(ref blockRef, 7);
- tmp1 = Unsafe.Add(ref blockRef, 1) + Unsafe.Add(ref blockRef, 6);
- tmp6 = Unsafe.Add(ref blockRef, 1) - Unsafe.Add(ref blockRef, 6);
- tmp2 = Unsafe.Add(ref blockRef, 2) + Unsafe.Add(ref blockRef, 5);
- tmp5 = Unsafe.Add(ref blockRef, 2) - Unsafe.Add(ref blockRef, 5);
- tmp3 = Unsafe.Add(ref blockRef, 3) + Unsafe.Add(ref blockRef, 4);
- tmp4 = Unsafe.Add(ref blockRef, 3) - Unsafe.Add(ref blockRef, 4);
-
- // Even part
- tmp10 = tmp0 + tmp3;
- tmp13 = tmp0 - tmp3;
- tmp11 = tmp1 + tmp2;
- tmp12 = tmp1 - tmp2;
-
- Unsafe.Add(ref blockRef, 0) = tmp10 + tmp11;
- Unsafe.Add(ref blockRef, 4) = tmp10 - tmp11;
-
- z1 = (tmp12 + tmp13) * 0.707106781f;
- Unsafe.Add(ref blockRef, 2) = tmp13 + z1;
- Unsafe.Add(ref blockRef, 6) = tmp13 - z1;
-
- // Odd part
- tmp10 = tmp4 + tmp5;
- tmp11 = tmp5 + tmp6;
- tmp12 = tmp6 + tmp7;
-
- z5 = (tmp10 - tmp12) * 0.382683433f;
- z2 = (0.541196100f * tmp10) + z5;
- z4 = (1.306562965f * tmp12) + z5;
- z3 = tmp11 * 0.707106781f;
-
- z11 = tmp7 + z3;
- z13 = tmp7 - z3;
-
- Unsafe.Add(ref blockRef, 5) = z13 + z2;
- Unsafe.Add(ref blockRef, 3) = z13 - z2;
- Unsafe.Add(ref blockRef, 1) = z11 + z4;
- Unsafe.Add(ref blockRef, 7) = z11 - z4;
-
- blockRef = ref Unsafe.Add(ref blockRef, dctSize);
- }
-
- // Second pass - process columns
- blockRef = ref Unsafe.As(ref block);
- for (int ctr = 7; ctr >= 0; ctr--)
- {
- tmp0 = Unsafe.Add(ref blockRef, dctSize * 0) + Unsafe.Add(ref blockRef, dctSize * 7);
- tmp7 = Unsafe.Add(ref blockRef, dctSize * 0) - Unsafe.Add(ref blockRef, dctSize * 7);
- tmp1 = Unsafe.Add(ref blockRef, dctSize * 1) + Unsafe.Add(ref blockRef, dctSize * 6);
- tmp6 = Unsafe.Add(ref blockRef, dctSize * 1) - Unsafe.Add(ref blockRef, dctSize * 6);
- tmp2 = Unsafe.Add(ref blockRef, dctSize * 2) + Unsafe.Add(ref blockRef, dctSize * 5);
- tmp5 = Unsafe.Add(ref blockRef, dctSize * 2) - Unsafe.Add(ref blockRef, dctSize * 5);
- tmp3 = Unsafe.Add(ref blockRef, dctSize * 3) + Unsafe.Add(ref blockRef, dctSize * 4);
- tmp4 = Unsafe.Add(ref blockRef, dctSize * 3) - Unsafe.Add(ref blockRef, dctSize * 4);
-
- // Even part
- tmp10 = tmp0 + tmp3;
- tmp13 = tmp0 - tmp3;
- tmp11 = tmp1 + tmp2;
- tmp12 = tmp1 - tmp2;
-
- Unsafe.Add(ref blockRef, dctSize * 0) = tmp10 + tmp11;
- Unsafe.Add(ref blockRef, dctSize * 4) = tmp10 - tmp11;
-
- z1 = (tmp12 + tmp13) * 0.707106781f;
- Unsafe.Add(ref blockRef, dctSize * 2) = tmp13 + z1;
- Unsafe.Add(ref blockRef, dctSize * 6) = tmp13 - z1;
-
- // Odd part
- tmp10 = tmp4 + tmp5;
- tmp11 = tmp5 + tmp6;
- tmp12 = tmp6 + tmp7;
-
- z5 = (tmp10 - tmp12) * 0.382683433f;
- z2 = (0.541196100f * tmp10) + z5;
- z4 = (1.306562965f * tmp12) + z5;
- z3 = tmp11 * 0.707106781f;
-
- z11 = tmp7 + z3;
- z13 = tmp7 - z3;
-
- Unsafe.Add(ref blockRef, dctSize * 5) = z13 + z2;
- Unsafe.Add(ref blockRef, dctSize * 3) = z13 - z2;
- Unsafe.Add(ref blockRef, dctSize * 1) = z11 + z4;
- Unsafe.Add(ref blockRef, dctSize * 7) = z11 - z4;
-
- blockRef = ref Unsafe.Add(ref blockRef, 1);
- }
- }
-
///
/// Apply floating point FDCT inplace using API.
///
- ///
- /// This implementation must be called only if hardware supports 4
- /// floating point numbers vector. Otherwise explicit scalar
- /// implementation is faster
- /// because it does not rely on block transposition.
- ///
/// Input block.
public static void FDCT_Vector4(ref Block8x8F block)
{
- DebugGuard.IsTrue(Vector.IsHardwareAccelerated, "Scalar implementation should be called for non-accelerated hardware.");
-
- // First pass - process rows
- block.TransposeInplace();
+ // First pass - process columns
FDCT8x4_Vector4(ref block.V0L);
FDCT8x4_Vector4(ref block.V0R);
- // Second pass - process columns
+ // Second pass - process rows
block.TransposeInplace();
FDCT8x4_Vector4(ref block.V0L);
FDCT8x4_Vector4(ref block.V0R);
diff --git a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs
index 6577739c1a..850de26c30 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs
@@ -3,6 +3,7 @@
#if SUPPORTS_RUNTIME_INTRINSICS
using System;
+using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
@@ -18,120 +19,138 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
#pragma warning restore SA1309
///
- /// Gets shuffle vectors for
+ /// Gets shuffle vectors for
/// zig zag implementation.
///
private static ReadOnlySpan SseShuffleMasks => new byte[]
{
- // row0
- 0, 1, 2, 3, _, _, _, _, _, _, 4, 5, 6, 7, _, _,
- _, _, _, _, 0, 1, _, _, 2, 3, _, _, _, _, 4, 5,
- _, _, _, _, _, _, 0, 1, _, _, _, _, _, _, _, _,
-
- // row1
- _, _, _, _, _, _, _, _, _, _, _, _, 8, 9, 10, 11,
- 2, 3, _, _, _, _, _, _, 4, 5, _, _, _, _, _, _,
- _, _, 0, 1, _, _, 2, 3, _, _, _, _, _, _, _, _,
-
- // row2
- _, _, _, _, _, _, 2, 3, _, _, _, _, _, _, 4, 5,
- _, _, _, _, _, _, _, _, 0, 1, _, _, 2, 3, _, _,
-
- // row3
- _, _, _, _, _, _, 12, 13, 14, 15, _, _, _, _, _, _,
- _, _, _, _, 10, 11, _, _, _, _, 12, 13, _, _, _, _,
- _, _, 8, 9, _, _, _, _, _, _, _, _, 10, 11, _, _,
- 6, 7, _, _, _, _, _, _, _, _, _, _, _, _, 8, 9,
-
- // row4
- _, _, 4, 5, _, _, _, _, _, _, _, _, 6, 7, _, _,
- _, _, _, _, 2, 3, _, _, _, _, 4, 5, _, _, _, _,
- _, _, _, _, _, _, 0, 1, 2, 3, _, _, _, _, _, _,
-
- // row5
- _, _, 12, 13, _, _, 14, 15, _, _, _, _, _, _, _, _,
- 10, 11, _, _, _, _, _, _, 12, 13, _, _, _, _, _, _,
-
- // row6
- _, _, _, _, _, _, _, _, 12, 13, _, _, 14, 15, _, _,
- _, _, _, _, _, _, 10, 11, _, _, _, _, _, _, 12, 13,
- 4, 5, 6, 7, _, _, _, _, _, _, _, _, _, _, _, _,
-
- // row7
- 10, 11, _, _, _, _, 12, 13, _, _, 14, 15, _, _, _, _,
- _, _, 8, 9, 10, 11, _, _, _, _, _, _, 12, 13, 14, 15
+#pragma warning disable SA1515
+ /* row0 - A0 B0 A1 A2 B1 C0 D0 C1 */
+ // A
+ 0, 1, _, _, 2, 3, 4, 5, _, _, _, _, _, _, _, _,
+ // B
+ _, _, 0, 1, _, _, _, _, 2, 3, _, _, _, _, _, _,
+ // C
+ _, _, _, _, _, _, _, _, _, _, 0, 1, _, _, 2, 3,
+
+ /* row1 - B2 A3 A4 B3 C2 D1 E0 F0 */
+ // A
+ _, _, 6, 7, 8, 9, _, _, _, _, _, _, _, _, _, _,
+ // B
+ 4, 5, _, _, _, _, 6, 7, _, _, _, _, _, _, _, _,
+
+ /* row2 - E1 D2 C3 B4 A5 A6 B5 C4 */
+ // A
+ _, _, _, _, _, _, _, _, 10, 11, 12, 13, _, _, _, _,
+ // B
+ _, _, _, _, _, _, 8, 9, _, _, _, _, 10, 11, _, _,
+ // C
+ _, _, _, _, 6, 7, _, _, _, _, _, _, _, _, 8, 9,
+
+ /* row3 - D3 E2 F1 G0 H0 G1 F2 E3 */
+ // E
+ _, _, 4, 5, _, _, _, _, _, _, _, _, _, _, 6, 7,
+ // F
+ _, _, _, _, 2, 3, _, _, _, _, _, _, 4, 5, _, _,
+ // G
+ _, _, _, _, _, _, 0, 1, _, _, 2, 3, _, _, _, _,
+
+ /* row4 - D4 C5 B6 A7 B7 C6 D5 E4 */
+ // B
+ _, _, _, _, 12, 13, _, _, 14, 15, _, _, _, _, _, _,
+ // C
+ _, _, 10, 11, _, _, _, _, _, _, 12, 13, _, _, _, _,
+ // D
+ 8, 9, _, _, _, _, _, _, _, _, _, _, 10, 11, _, _,
+
+ /* row5 - F3 G2 H1 H2 G3 F4 E5 D6 */
+ // F
+ 6, 7, _, _, _, _, _, _, _, _, 8, 9, _, _, _, _,
+ // G
+ _, _, 4, 5, _, _, _, _, 6, 7, _, _, _, _, _, _,
+ // H
+ _, _, _, _, 2, 3, 4, 5, _, _, _, _, _, _, _, _,
+
+ /* row6 - C7 D7 E6 F5 G4 H3 H4 G5 */
+ // G
+ _, _, _, _, _, _, _, _, 8, 9, _, _, _, _, 10, 11,
+ // H
+ _, _, _, _, _, _, _, _, _, _, 6, 7, 8, 9, _, _,
+
+ /* row7 - F6 E7 F7 G6 H5 H6 G7 H7 */
+ // F
+ 12, 13, _, _, 14, 15, _, _, _, _, _, _, _, _, _, _,
+ // G
+ _, _, _, _, _, _, 12, 13, _, _, _, _, 14, 15, _, _,
+ // H
+ _, _, _, _, _, _, _, _, 10, 11, 12, 13, _, _, 14, 15,
+#pragma warning restore SA1515
};
///
- /// Gets shuffle vectors for
+ /// Gets shuffle vectors for
/// zig zag implementation.
///
private static ReadOnlySpan AvxShuffleMasks => new byte[]
{
- // 01_AB/01_EF/23_CD - cross-lane
- 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0,
-
- // 01_AB - inner-lane
- 0, 1, 2, 3, 8, 9, _, _, 10, 11, 4, 5, 6, 7, 12, 13, _, _, _, _, _, _, _, _, _, _, 10, 11, 4, 5, 6, 7,
-
- // 01_CD/23_GH - cross-lane
- 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, _, _, _, _, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, _, _, _, _,
-
- // 01_CD - inner-lane
- _, _, _, _, _, _, 0, 1, _, _, _, _, _, _, _, _, 2, 3, 8, 9, _, _, 10, 11, 4, 5, _, _, _, _, _, _,
-
- // 01_EF - inner-lane
- _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 0, 1, _, _, _, _, _, _, _, _, _, _,
-
- // 23_AB/45_CD/67_EF - cross-lane
- 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, _, _, _, _, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, _, _, _, _,
-
- // 23_AB - inner-lane
- 4, 5, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 6, 7, 0, 1, 2, 3, 8, 9, _, _, _, _,
-
- // 23_CD - inner-lane
- _, _, 6, 7, 12, 13, _, _, _, _, _, _, _, _, _, _, 10, 11, 4, 5, _, _, _, _, _, _, _, _, 6, 7, 12, 13,
-
- // 23_EF - inner-lane
- _, _, _, _, _, _, 2, 3, 8, 9, _, _, 10, 11, 4, 5, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _,
-
- // 23_GH - inner-lane
- _, _, _, _, _, _, _, _, _, _, 0, 1, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _,
-
- // 45_AB - inner-lane
- _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 10, 11, _, _, _, _, _, _, _, _, _, _,
-
- // 45_CD - inner-lane
- _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 6, 7, 0, 1, _, _, 2, 3, 8, 9, _, _, _, _, _, _,
-
- // 45_EF - cross-lane
- 1, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, _, _, _, _, 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0,
-
- // 45_EF - inner-lane
- 2, 3, 8, 9, _, _, _, _, _, _, _, _, 10, 11, 4, 5, _, _, _, _, _, _, _, _, _, _, 2, 3, 8, 9, _, _,
-
- // 45_GH - inner-lane
- _, _, _, _, 2, 3, 8, 9, 10, 11, 4, 5, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 6, 7,
-
- // 67_CD - inner-lane
- _, _, _, _, _, _, _, _, _, _, 10, 11, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _,
-
- // 67_EF - inner-lane
- _, _, _, _, _, _, 6, 7, 0, 1, _, _, 2, 3, 8, 9, _, _, _, _, _, _, _, _, 10, 11, _, _, _, _, _, _,
-
- // 67_GH - inner-lane
- 8, 9, 10, 11, 4, 5, _, _, _, _, _, _, _, _, _, _, 2, 3, 8, 9, 10, 11, 4, 5, _, _, 6, 7, 12, 13, 14, 15
+#pragma warning disable SA1515
+ /* 01 */
+ // [cr] crln_01_AB_CD
+ 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, _, _, _, _, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0,
+ // (in) AB
+ 0, 1, 8, 9, 2, 3, 4, 5, 10, 11, _, _, _, _, _, _, 12, 13, 2, 3, 4, 5, 14, 15, _, _, _, _, _, _, _, _,
+ // (in) CD
+ _, _, _, _, _, _, _, _, _, _, 0, 1, 8, 9, 2, 3, _, _, _, _, _, _, _, _, 0, 1, 10, 11, _, _, _, _,
+ // [cr] crln_01_23_EF_23_CD
+ 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0,
+ // (in) EF
+ _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 0, 1, 8, 9,
+
+ /* 23 */
+ // [cr] crln_23_AB_23_45_GH
+ 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0,
+ // (in) AB
+ _, _, _, _, _, _, 8, 9, 2, 3, 4, 5, 10, 11, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _,
+ // (in) CDe
+ _, _, 12, 13, 6, 7, _, _, _, _, _, _, _, _, 8, 9, 14, 15, _, _, _, _, _, _, _, _, _, _, _, _, _, _,
+ // (in) EF
+ 2, 3, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 4, 5, 10, 11, _, _, _, _, _, _, 12, 13, 6, 7,
+ // (in) GH
+ _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 0, 1, 8, 9, 2, 3, _, _, _, _,
+
+ /* 45 */
+ // (in) AB
+ _, _, _, _, 12, 13, 6, 7, 14, 15, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _,
+ // [cr] crln_45_67_CD_45_EF
+ 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0,
+ // (in) CD
+ 8, 9, 2, 3, _, _, _, _, _, _, 4, 5, 10, 11, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 12, 13,
+ // (in) EF
+ _, _, _, _, _, _, _, _, _, _, _, _, _, _, 0, 1, 6, 7, _, _, _, _, _, _, _, _, 8, 9, 2, 3, _, _,
+ // (in) GH
+ _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 4, 5, 10, 11, 12, 13, 6, 7, _, _, _, _, _, _,
+
+ /* 67 */
+ // (in) CD
+ 6, 7, 14, 15, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _,
+ // [cr] crln_67_EF_67_GH
+ 2, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, _, _, _, _,
+ // (in) EF
+ _, _, _, _, 4, 5, 14, 15, _, _, _, _, _, _, _, _, 8, 9, 2, 3, 10, 11, _, _, _, _, _, _, _, _, _, _,
+ // (in) GH
+ _, _, _, _, _, _, _, _, 0, 1, 10, 11, 12, 13, 2, 3, _, _, _, _, _, _, 0, 1, 6, 7, 8, 9, 2, 3, 10, 11,
+#pragma warning restore SA1515
};
///
/// Applies zig zag ordering for given 8x8 matrix using SSE cpu intrinsics.
///
/// Input matrix.
- public static unsafe void ApplyZigZagOrderingSsse3(ref Block8x8 block)
+ public static unsafe void ApplyTransposingZigZagOrderingSsse3(ref Block8x8 block)
{
DebugGuard.IsTrue(Ssse3.IsSupported, "Ssse3 support is required to run this operation!");
- fixed (byte* maskPtr = SseShuffleMasks)
+ fixed (byte* shuffleVectorsPtr = &MemoryMarshal.GetReference(SseShuffleMasks))
{
Vector128 rowA = block.V0.AsByte();
Vector128 rowB = block.V1.AsByte();
@@ -142,73 +161,69 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
Vector128 rowG = block.V6.AsByte();
Vector128 rowH = block.V7.AsByte();
- // row0 - A0 A1 B0 C0 B1 A2 A3 B2
- Vector128 rowA0 = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (16 * 0))).AsInt16();
- Vector128 rowB0 = Ssse3.Shuffle(rowB, Sse2.LoadVector128(maskPtr + (16 * 1))).AsInt16();
- Vector128 row0 = Sse2.Or(rowA0, rowB0);
- Vector128 rowC0 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 2))).AsInt16();
- row0 = Sse2.Or(row0, rowC0);
-
- // row1 - C1 D0 E0 D1 C2 B3 A4 A5
- Vector128 rowA1 = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (16 * 3))).AsInt16();
- Vector128 rowC1 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 4))).AsInt16();
- Vector128 row1 = Sse2.Or(rowA1, rowC1);
- Vector128 rowD1 = Ssse3.Shuffle(rowD, Sse2.LoadVector128(maskPtr + (16 * 5))).AsInt16();
- row1 = Sse2.Or(row1, rowD1);
- row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowB.AsUInt16(), 3), 5).AsInt16();
- row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 0), 2).AsInt16();
-
- // row2
- Vector128 rowE2 = Ssse3.Shuffle(rowE, Sse2.LoadVector128(maskPtr + (16 * 6))).AsInt16();
- Vector128 rowF2 = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (16 * 7))).AsInt16();
- Vector128 row2 = Sse2.Or(rowE2, rowF2);
- row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowB.AsUInt16(), 4), 0).AsInt16();
- row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowC.AsUInt16(), 3), 1).AsInt16();
- row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 2), 2).AsInt16();
- row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowG.AsUInt16(), 0), 5).AsInt16();
-
- // row3
- Vector128 rowA3 = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (16 * 8))).AsInt16().AsInt16();
- Vector128 rowB3 = Ssse3.Shuffle(rowB, Sse2.LoadVector128(maskPtr + (16 * 9))).AsInt16().AsInt16();
- Vector128 row3 = Sse2.Or(rowA3, rowB3);
- Vector128 rowC3 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 10))).AsInt16();
- row3 = Sse2.Or(row3, rowC3);
- Vector128 shuffleRowD3EF = Sse2.LoadVector128(maskPtr + (16 * 11));
- Vector128 rowD3 = Ssse3.Shuffle(rowD, shuffleRowD3EF).AsInt16();
- row3 = Sse2.Or(row3, rowD3);
-
- // row4
- Vector128 rowE4 = Ssse3.Shuffle(rowE, shuffleRowD3EF).AsInt16();
- Vector128 rowF4 = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (16 * 12))).AsInt16();
- Vector128 row4 = Sse2.Or(rowE4, rowF4);
- Vector128 rowG4 = Ssse3.Shuffle(rowG, Sse2.LoadVector128(maskPtr + (16 * 13))).AsInt16();
- row4 = Sse2.Or(row4, rowG4);
- Vector128 rowH4 = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (16 * 14))).AsInt16();
- row4 = Sse2.Or(row4, rowH4);
-
- // row5
- Vector128 rowC5 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 15))).AsInt16();
- Vector128 rowD5 = Ssse3.Shuffle(rowD, Sse2.LoadVector128(maskPtr + (16 * 16))).AsInt16();
- Vector128 row5 = Sse2.Or(rowC5, rowD5);
- row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowB.AsUInt16(), 7), 2).AsInt16();
- row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 5), 5).AsInt16();
- row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowF.AsUInt16(), 4), 6).AsInt16();
- row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowG.AsUInt16(), 3), 7).AsInt16();
-
- // row6
- Vector128 rowE6 = Ssse3.Shuffle(rowE, Sse2.LoadVector128(maskPtr + (16 * 17))).AsInt16();
- Vector128 rowF6 = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (16 * 18))).AsInt16();
- Vector128 row6 = Sse2.Or(rowE6, rowF6);
- Vector128 rowH6 = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (16 * 19))).AsInt16();
- row6 = Sse2.Or(row6, rowH6);
- row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 7), 5).AsInt16();
- row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowG.AsUInt16(), 4), 2).AsInt16();
-
- // row7
- Vector128 rowG7 = Ssse3.Shuffle(rowG, Sse2.LoadVector128(maskPtr + (16 * 20))).AsInt16();
- Vector128 rowH7 = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (16 * 21))).AsInt16();
- Vector128 row7 = Sse2.Or(rowG7, rowH7);
- row7 = Sse2.Insert(row7.AsUInt16(), Sse2.Extract(rowF.AsUInt16(), 7), 4).AsInt16();
+ // row0 - A0 B0 A1 A2 B1 C0 D0 C1
+ Vector128 row0_A = Ssse3.Shuffle(rowA, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 0))).AsInt16();
+ Vector128 row0_B = Ssse3.Shuffle(rowB, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 1))).AsInt16();
+ Vector128 row0_C = Ssse3.Shuffle(rowC, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 2))).AsInt16();
+ Vector128 row0 = Sse2.Or(Sse2.Or(row0_A, row0_B), row0_C);
+ row0 = Sse2.Insert(row0.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 0), 6).AsInt16();
+
+ // row1 - B2 A3 A4 B3 C2 D1 E0 F0
+ Vector128 row1_A = Ssse3.Shuffle(rowA, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 3))).AsInt16();
+ Vector128 row1_B = Ssse3.Shuffle(rowB, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 4))).AsInt16();
+ Vector128 row1 = Sse2.Or(row1_A, row1_B);
+ row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowC.AsUInt16(), 2), 4).AsInt16();
+ row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 1), 5).AsInt16();
+ row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 0), 6).AsInt16();
+ row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowF.AsUInt16(), 0), 7).AsInt16();
+
+ // row2 - E1 D2 C3 B4 A5 A6 B5 C4
+ Vector128 row2_A = Ssse3.Shuffle(rowA, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 5))).AsInt16();
+ Vector128 row2_B = Ssse3.Shuffle(rowB, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 6))).AsInt16();
+ Vector128 row2_C = Ssse3.Shuffle(rowC, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 7))).AsInt16();
+ Vector128 row2 = Sse2.Or(Sse2.Or(row2_A, row2_B), row2_C);
+ row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 2), 1).AsInt16();
+ row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 1), 0).AsInt16();
+
+ // row3 - D3 E2 F1 G0 H0 G1 F2 E3
+ Vector128 row3_E = Ssse3.Shuffle(rowE, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 8))).AsInt16();
+ Vector128 row3_F = Ssse3.Shuffle(rowF, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 9))).AsInt16();
+ Vector128 row3_G = Ssse3.Shuffle(rowG, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 10))).AsInt16();
+ Vector128 row3 = Sse2.Or(Sse2.Or(row3_E, row3_F), row3_G);
+ row3 = Sse2.Insert(row3.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 3), 0).AsInt16();
+ row3 = Sse2.Insert(row3.AsUInt16(), Sse2.Extract(rowH.AsUInt16(), 0), 4).AsInt16();
+
+ // row4 - D4 C5 B6 A7 B7 C6 D5 E4
+ Vector128 row4_B = Ssse3.Shuffle(rowB, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 11))).AsInt16();
+ Vector128 row4_C = Ssse3.Shuffle(rowC, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 12))).AsInt16();
+ Vector128 row4_D = Ssse3.Shuffle(rowD, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 13))).AsInt16();
+ Vector128 row4 = Sse2.Or(Sse2.Or(row4_B, row4_C), row4_D);
+ row4 = Sse2.Insert(row4.AsUInt16(), Sse2.Extract(rowA.AsUInt16(), 7), 3).AsInt16();
+ row4 = Sse2.Insert(row4.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 4), 7).AsInt16();
+
+ // row5 - F3 G2 H1 H2 G3 F4 E5 D6
+ Vector128 row5_F = Ssse3.Shuffle(rowF, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 14))).AsInt16();
+ Vector128 row5_G = Ssse3.Shuffle(rowG, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 15))).AsInt16();
+ Vector128 row5_H = Ssse3.Shuffle(rowH, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 16))).AsInt16();
+ Vector128 row5 = Sse2.Or(Sse2.Or(row5_F, row5_G), row5_H);
+ row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 6), 7).AsInt16();
+ row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 5), 6).AsInt16();
+
+ // row6 - C7 D7 E6 F5 G4 H3 H4 G5
+ Vector128 row6_G = Ssse3.Shuffle(rowG, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 17))).AsInt16();
+ Vector128 row6_H = Ssse3.Shuffle(rowH, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 18))).AsInt16();
+ Vector128 row6 = Sse2.Or(row6_G, row6_H);
+ row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowC.AsUInt16(), 7), 0).AsInt16();
+ row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 7), 1).AsInt16();
+ row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 6), 2).AsInt16();
+ row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowF.AsUInt16(), 5), 3).AsInt16();
+
+ // row7 - F6 E7 F7 G6 H5 H6 G7 H7
+ Vector128 row7_F = Ssse3.Shuffle(rowF, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 19))).AsInt16();
+ Vector128 row7_G = Ssse3.Shuffle(rowG, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 20))).AsInt16();
+ Vector128 row7_H = Ssse3.Shuffle(rowH, Sse2.LoadVector128(shuffleVectorsPtr + (16 * 21))).AsInt16();
+ Vector128 row7 = Sse2.Or(Sse2.Or(row7_F, row7_G), row7_H);
+ row7 = Sse2.Insert(row7.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 7), 1).AsInt16();
block.V0 = row0;
block.V1 = row1;
@@ -225,69 +240,61 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// Applies zig zag ordering for given 8x8 matrix using AVX cpu intrinsics.
///
/// Input matrix.
- public static unsafe void ApplyZigZagOrderingAvx2(ref Block8x8 block)
+ public static unsafe void ApplyTransposingZigZagOrderingAvx2(ref Block8x8 block)
{
DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!");
- fixed (byte* shuffleVectorsPtr = AvxShuffleMasks)
+ fixed (byte* shuffleVectorsPtr = &MemoryMarshal.GetReference(AvxShuffleMasks))
{
- Vector256 rowsAB = block.V01.AsByte();
- Vector256 rowsCD = block.V23.AsByte();
- Vector256 rowsEF = block.V45.AsByte();
- Vector256 rowsGH = block.V67.AsByte();
-
- // rows 0 1
- Vector256 rows_AB01_EF01_CD23_shuffleMask = Avx.LoadVector256(shuffleVectorsPtr + (0 * 32)).AsInt32();
- Vector256 row01_AB = Avx2.PermuteVar8x32(rowsAB.AsInt32(), rows_AB01_EF01_CD23_shuffleMask).AsByte();
+ Vector256 rowAB = block.V01.AsByte();
+ Vector256 rowCD = block.V23.AsByte();
+ Vector256 rowEF = block.V45.AsByte();
+ Vector256 rowGH = block.V67.AsByte();
+
+ /* row01 - A0 B0 A1 A2 B1 C0 D0 C1 | B2 A3 A4 B3 C2 D1 E0 F0 */
+ Vector256 crln_01_AB_CD = Avx.LoadVector256(shuffleVectorsPtr + (0 * 32)).AsInt32();
+ Vector256 row01_AB = Avx2.PermuteVar8x32(rowAB.AsInt32(), crln_01_AB_CD).AsByte();
row01_AB = Avx2.Shuffle(row01_AB, Avx.LoadVector256(shuffleVectorsPtr + (1 * 32))).AsByte();
-
- Vector256 rows_CD01_GH23_shuffleMask = Avx.LoadVector256(shuffleVectorsPtr + (2 * 32)).AsInt32();
- Vector256 row01_CD = Avx2.PermuteVar8x32(rowsCD.AsInt32(), rows_CD01_GH23_shuffleMask).AsByte();
- row01_CD = Avx2.Shuffle(row01_CD, Avx.LoadVector256(shuffleVectorsPtr + (3 * 32))).AsByte();
-
- Vector256 row0123_EF = Avx2.PermuteVar8x32(rowsEF.AsInt32(), rows_AB01_EF01_CD23_shuffleMask).AsByte();
- Vector256 row01_EF = Avx2.Shuffle(row0123_EF, Avx.LoadVector256(shuffleVectorsPtr + (4 * 32))).AsByte();
-
- Vector256 row01 = Avx2.Or(Avx2.Or(row01_AB, row01_CD), row01_EF);
-
- // rows 2 3
- Vector256 rows_AB23_CD45_EF67_shuffleMask = Avx.LoadVector256(shuffleVectorsPtr + (5 * 32)).AsInt32();
- Vector256 row2345_AB = Avx2.PermuteVar8x32(rowsAB.AsInt32(), rows_AB23_CD45_EF67_shuffleMask).AsByte();
- Vector256 row23_AB = Avx2.Shuffle(row2345_AB, Avx.LoadVector256(shuffleVectorsPtr + (6 * 32))).AsByte();
-
- Vector256 row23_CD = Avx2.PermuteVar8x32(rowsCD.AsInt32(), rows_AB01_EF01_CD23_shuffleMask).AsByte();
+ Vector256 row01_CD = Avx2.PermuteVar8x32(rowCD.AsInt32(), crln_01_AB_CD).AsByte();
+ row01_CD = Avx2.Shuffle(row01_CD, Avx.LoadVector256(shuffleVectorsPtr + (2 * 32))).AsByte();
+ Vector256 crln_01_23_EF_23_CD = Avx.LoadVector256(shuffleVectorsPtr + (3 * 32)).AsInt32();
+ Vector256 row01_23_EF = Avx2.PermuteVar8x32(rowEF.AsInt32(), crln_01_23_EF_23_CD).AsByte();
+ Vector256 row01_EF = Avx2.Shuffle(row01_23_EF, Avx.LoadVector256(shuffleVectorsPtr + (4 * 32))).AsByte();
+
+ Vector256 row01 = Avx2.Or(row01_AB, Avx2.Or(row01_CD, row01_EF));
+
+ /* row23 - E1 D2 C3 B4 A5 A6 B5 C4 | D3 E2 F1 G0 H0 G1 F2 E3 */
+ Vector256 crln_23_AB_23_45_GH = Avx.LoadVector256(shuffleVectorsPtr + (5 * 32)).AsInt32();
+ Vector256 row23_45_AB = Avx2.PermuteVar8x32(rowAB.AsInt32(), crln_23_AB_23_45_GH).AsByte();
+ Vector256 row23_AB = Avx2.Shuffle(row23_45_AB, Avx.LoadVector256(shuffleVectorsPtr + (6 * 32))).AsByte();
+ Vector256 row23_CD = Avx2.PermuteVar8x32(rowCD.AsInt32(), crln_01_23_EF_23_CD).AsByte();
row23_CD = Avx2.Shuffle(row23_CD, Avx.LoadVector256(shuffleVectorsPtr + (7 * 32))).AsByte();
-
- Vector256 row23_EF = Avx2.Shuffle(row0123_EF, Avx.LoadVector256(shuffleVectorsPtr + (8 * 32))).AsByte();
-
- Vector256 row2345_GH = Avx2.PermuteVar8x32(rowsGH.AsInt32(), rows_CD01_GH23_shuffleMask).AsByte();
- Vector256 row23_GH = Avx2.Shuffle(row2345_GH, Avx.LoadVector256(shuffleVectorsPtr + (9 * 32)).AsByte());
+ Vector256 row23_EF = Avx2.Shuffle(row01_23_EF, Avx.LoadVector256(shuffleVectorsPtr + (8 * 32))).AsByte();
+ Vector256 row23_45_GH = Avx2.PermuteVar8x32(rowGH.AsInt32(), crln_23_AB_23_45_GH).AsByte();
+ Vector256 row23_GH = Avx2.Shuffle(row23_45_GH, Avx.LoadVector256(shuffleVectorsPtr + (9 * 32))).AsByte();
Vector256 row23 = Avx2.Or(Avx2.Or(row23_AB, row23_CD), Avx2.Or(row23_EF, row23_GH));
- // rows 4 5
- Vector256 row45_AB = Avx2.Shuffle(row2345_AB, Avx.LoadVector256(shuffleVectorsPtr + (10 * 32)).AsByte());
- Vector256 row4567_CD = Avx2.PermuteVar8x32(rowsCD.AsInt32(), rows_AB23_CD45_EF67_shuffleMask).AsByte();
- Vector256 row45_CD = Avx2.Shuffle(row4567_CD, Avx.LoadVector256(shuffleVectorsPtr + (11 * 32)).AsByte());
-
- Vector256 rows_EF45_GH67_shuffleMask = Avx.LoadVector256(shuffleVectorsPtr + (12 * 32)).AsInt32();
- Vector256 row45_EF = Avx2.PermuteVar8x32(rowsEF.AsInt32(), rows_EF45_GH67_shuffleMask).AsByte();
- row45_EF = Avx2.Shuffle(row45_EF, Avx.LoadVector256(shuffleVectorsPtr + (13 * 32)).AsByte());
-
- Vector256 row45_GH = Avx2.Shuffle(row2345_GH, Avx.LoadVector256(shuffleVectorsPtr + (14 * 32)).AsByte());
+ /* row45 - D4 C5 B6 A7 B7 C6 D5 E4 | F3 G2 H1 H2 G3 F4 E5 D6 */
+ Vector256 row45_AB = Avx2.Shuffle(row23_45_AB, Avx.LoadVector256(shuffleVectorsPtr + (10 * 32))).AsByte();
+ Vector256 crln_45_67_CD_45_EF = Avx.LoadVector256(shuffleVectorsPtr + (11 * 32)).AsInt32();
+ Vector256 row45_67_CD = Avx2.PermuteVar8x32(rowCD.AsInt32(), crln_45_67_CD_45_EF).AsByte();
+ Vector256 row45_CD = Avx2.Shuffle(row45_67_CD, Avx.LoadVector256(shuffleVectorsPtr + (12 * 32))).AsByte();
+ Vector256 row45_EF = Avx2.PermuteVar8x32(rowEF.AsInt32(), crln_45_67_CD_45_EF).AsByte();
+ row45_EF = Avx2.Shuffle(row45_EF, Avx.LoadVector256(shuffleVectorsPtr + (13 * 32))).AsByte();
+ Vector256 row45_GH = Avx2.Shuffle(row23_45_GH, Avx.LoadVector256(shuffleVectorsPtr + (14 * 32))).AsByte();
Vector256 row45 = Avx2.Or(Avx2.Or(row45_AB, row45_CD), Avx2.Or(row45_EF, row45_GH));
- // rows 6 7
- Vector256 row67_CD = Avx2.Shuffle(row4567_CD, Avx.LoadVector256(shuffleVectorsPtr + (15 * 32)).AsByte());
-
- Vector256 row67_EF = Avx2.PermuteVar8x32(rowsEF.AsInt32(), rows_AB23_CD45_EF67_shuffleMask).AsByte();
- row67_EF = Avx2.Shuffle(row67_EF, Avx.LoadVector256(shuffleVectorsPtr + (16 * 32)).AsByte());
-
- Vector256 row67_GH = Avx2.PermuteVar8x32(rowsGH.AsInt32(), rows_EF45_GH67_shuffleMask).AsByte();
- row67_GH = Avx2.Shuffle(row67_GH, Avx.LoadVector256(shuffleVectorsPtr + (17 * 32)).AsByte());
+ /* row67 - C7 D7 E6 F5 G4 H3 H4 G5 | F6 E7 F7 G6 H5 H6 G7 H7 */
+ Vector256 row67_CD = Avx2.Shuffle(row45_67_CD, Avx.LoadVector256(shuffleVectorsPtr + (15 * 32))).AsByte();
+ Vector256 crln_67_EF_67_GH = Avx.LoadVector256(shuffleVectorsPtr + (16 * 32)).AsInt32();
+ Vector256 row67_EF = Avx2.PermuteVar8x32(rowEF.AsInt32(), crln_67_EF_67_GH).AsByte();
+ row67_EF = Avx2.Shuffle(row67_EF, Avx.LoadVector256(shuffleVectorsPtr + (17 * 32))).AsByte();
+ Vector256 row67_GH = Avx2.PermuteVar8x32(rowGH.AsInt32(), crln_67_EF_67_GH).AsByte();
+ row67_GH = Avx2.Shuffle(row67_GH, Avx.LoadVector256(shuffleVectorsPtr + (18 * 32))).AsByte();
- Vector256 row67 = Avx2.Or(Avx2.Or(row67_CD, row67_EF), row67_GH);
+ Vector256 row67 = Avx2.Or(row67_CD, Avx2.Or(row67_EF, row67_GH));
block.V01 = row01.AsInt16();
block.V23 = row23.AsInt16();
diff --git a/src/ImageSharp/Formats/Webp/AlphaEncoder.cs b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs
new file mode 100644
index 0000000000..1019073d87
--- /dev/null
+++ b/src/ImageSharp/Formats/Webp/AlphaEncoder.cs
@@ -0,0 +1,133 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Buffers;
+using SixLabors.ImageSharp.Advanced;
+using SixLabors.ImageSharp.Formats.Webp.Lossless;
+using SixLabors.ImageSharp.Memory;
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp.Formats.Webp
+{
+ ///
+ /// Methods for encoding the alpha data of a VP8 image.
+ ///
+ internal class AlphaEncoder : IDisposable
+ {
+ private IMemoryOwner alphaData;
+
+ ///
+ /// Encodes the alpha channel data.
+ /// Data is either compressed as lossless webp image or uncompressed.
+ ///
+ /// The pixel format.
+ /// The to encode from.
+ /// The global configuration.
+ /// The memory manager.
+ /// Indicates, if the data should be compressed with the lossless webp compression.
+ /// The size in bytes of the alpha data.
+ /// The encoded alpha data.
+ public IMemoryOwner EncodeAlpha(Image image, Configuration configuration, MemoryAllocator memoryAllocator, bool compress, out int size)
+ where TPixel : unmanaged, IPixel
+ {
+ int width = image.Width;
+ int height = image.Height;
+ this.alphaData = ExtractAlphaChannel(image, configuration, memoryAllocator);
+
+ if (compress)
+ {
+ WebpEncodingMethod effort = WebpEncodingMethod.Default;
+ int quality = 8 * (int)effort;
+ using var lossLessEncoder = new Vp8LEncoder(
+ memoryAllocator,
+ configuration,
+ width,
+ height,
+ quality,
+ effort,
+ WebpTransparentColorMode.Preserve,
+ false,
+ 0);
+
+ // The transparency information will be stored in the green channel of the ARGB quadruplet.
+ // The green channel is allowed extra transformation steps in the specification -- unlike the other channels,
+ // that can improve compression.
+ using Image alphaAsImage = DispatchAlphaToGreen(image, this.alphaData.GetSpan());
+
+ size = lossLessEncoder.EncodeAlphaImageData(alphaAsImage, this.alphaData);
+
+ return this.alphaData;
+ }
+
+ size = width * height;
+ return this.alphaData;
+ }
+
+ ///
+ /// Store the transparency in the green channel.
+ ///
+ /// The pixel format.
+ /// The to encode from.
+ /// A byte sequence of length width * height, containing all the 8-bit transparency values in scan order.
+ /// The transparency image.
+ private static Image DispatchAlphaToGreen(Image image, Span alphaData)
+ where TPixel : unmanaged, IPixel
+ {
+ int width = image.Width;
+ int height = image.Height;
+ var alphaAsImage = new Image(width, height);
+
+ for (int y = 0; y < height; y++)
+ {
+ Memory rowBuffer = alphaAsImage.DangerousGetPixelRowMemory(y);
+ Span pixelRow = rowBuffer.Span;
+ Span alphaRow = alphaData.Slice(y * width, width);
+ for (int x = 0; x < width; x++)
+ {
+ // Leave A/R/B channels zero'd.
+ pixelRow[x] = new Rgba32(0, alphaRow[x], 0, 0);
+ }
+ }
+
+ return alphaAsImage;
+ }
+
+ ///
+ /// Extract the alpha data of the image.
+ ///
+ /// The pixel format.
+ /// The to encode from.
+ /// The global configuration.
+ /// The memory manager.
+ /// A byte sequence of length width * height, containing all the 8-bit transparency values in scan order.
+ private static IMemoryOwner ExtractAlphaChannel(Image image, Configuration configuration, MemoryAllocator memoryAllocator)
+ where TPixel : unmanaged, IPixel
+ {
+ Buffer2D imageBuffer = image.Frames.RootFrame.PixelBuffer;
+ int height = image.Height;
+ int width = image.Width;
+ IMemoryOwner alphaDataBuffer = memoryAllocator.Allocate(width * height);
+ Span alphaData = alphaDataBuffer.GetSpan();
+
+ using IMemoryOwner rowBuffer = memoryAllocator.Allocate(width);
+ Span rgbaRow = rowBuffer.GetSpan();
+
+ for (int y = 0; y < height; y++)
+ {
+ Span rowSpan = imageBuffer.DangerousGetRowSpan(y);
+ PixelOperations.Instance.ToRgba32(configuration, rowSpan, rgbaRow);
+ int offset = y * width;
+ for (int x = 0; x < width; x++)
+ {
+ alphaData[offset + x] = rgbaRow[x].A;
+ }
+ }
+
+ return alphaDataBuffer;
+ }
+
+ ///
+ public void Dispose() => this.alphaData?.Dispose();
+ }
+}
diff --git a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs
index ac039be797..fc1accfdee 100644
--- a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs
+++ b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs
@@ -47,6 +47,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
/// The stream to write to.
public void WriteToStream(Stream stream) => stream.Write(this.Buffer.AsSpan(0, this.NumBytes()));
+ ///
+ /// Writes the encoded bytes of the image to the given buffer. Call Finish() before this.
+ ///
+ /// The destination buffer.
+ public void WriteToBuffer(Span dest) => this.Buffer.AsSpan(0, this.NumBytes()).CopyTo(dest);
+
///
/// Resizes the buffer to write to.
///
@@ -94,7 +100,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
/// Calculates the chunk size of EXIF or XMP metadata.
///
/// The metadata profile bytes.
- /// The exif chunk size in bytes.
+ /// The metadata chunk size in bytes.
protected uint MetadataChunkSize(byte[] metadataBytes)
{
uint metaSize = (uint)metadataBytes.Length;
@@ -103,6 +109,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
return metaChunkSize;
}
+ ///
+ /// Calculates the chunk size of a alpha chunk.
+ ///
+ /// The alpha chunk bytes.
+ /// The alpha data chunk size in bytes.
+ protected uint AlphaChunkSize(Span alphaBytes)
+ {
+ uint alphaSize = (uint)alphaBytes.Length + 1;
+ uint alphaChunkSize = WebpConstants.ChunkHeaderSize + alphaSize + (alphaSize & 1);
+
+ return alphaChunkSize;
+ }
+
///
/// Writes a metadata profile (EXIF or XMP) to the stream.
///
@@ -128,6 +147,37 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
}
}
+ ///
+ /// Writes the alpha chunk to the stream.
+ ///
+ /// The stream to write to.
+ /// The alpha channel data bytes.
+ /// Indicates, if the alpha channel data is compressed.
+ protected void WriteAlphaChunk(Stream stream, Span dataBytes, bool alphaDataIsCompressed)
+ {
+ uint size = (uint)dataBytes.Length + 1;
+ Span buf = this.scratchBuffer.AsSpan(0, 4);
+ BinaryPrimitives.WriteUInt32BigEndian(buf, (uint)WebpChunkType.Alpha);
+ stream.Write(buf);
+ BinaryPrimitives.WriteUInt32LittleEndian(buf, size);
+ stream.Write(buf);
+
+ byte flags = 0;
+ if (alphaDataIsCompressed)
+ {
+ flags |= 1;
+ }
+
+ stream.WriteByte(flags);
+ stream.Write(dataBytes);
+
+ // Add padding byte if needed.
+ if ((size & 1) == 1)
+ {
+ stream.WriteByte(0);
+ }
+ }
+
///
/// Writes a VP8X header to the stream.
///
diff --git a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs
index 4e91bedb0b..fa6e09d875 100644
--- a/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs
+++ b/src/ImageSharp/Formats/Webp/BitWriter/Vp8BitWriter.cs
@@ -409,7 +409,17 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
/// The width of the image.
/// The height of the image.
/// Flag indicating, if a alpha channel is present.
- public void WriteEncodedImageToStream(Stream stream, ExifProfile exifProfile, XmpProfile xmpProfile, uint width, uint height, bool hasAlpha)
+ /// The alpha channel data.
+ /// Indicates, if the alpha data is compressed.
+ public void WriteEncodedImageToStream(
+ Stream stream,
+ ExifProfile exifProfile,
+ XmpProfile xmpProfile,
+ uint width,
+ uint height,
+ bool hasAlpha,
+ Span alphaData,
+ bool alphaDataIsCompressed)
{
bool isVp8X = false;
byte[] exifBytes = null;
@@ -418,7 +428,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
if (exifProfile != null)
{
isVp8X = true;
- riffSize += ExtendedFileChunkSize;
exifBytes = exifProfile.ToByteArray();
riffSize += this.MetadataChunkSize(exifBytes);
}
@@ -426,11 +435,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
if (xmpProfile != null)
{
isVp8X = true;
- riffSize += ExtendedFileChunkSize;
xmpBytes = xmpProfile.Data;
riffSize += this.MetadataChunkSize(xmpBytes);
}
+ if (hasAlpha)
+ {
+ isVp8X = true;
+ riffSize += this.AlphaChunkSize(alphaData);
+ }
+
+ if (isVp8X)
+ {
+ riffSize += ExtendedFileChunkSize;
+ }
+
this.Finish();
uint numBytes = (uint)this.NumBytes();
int mbSize = this.enc.Mbw * this.enc.Mbh;
@@ -451,7 +470,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
riffSize += WebpConstants.TagSize + WebpConstants.ChunkHeaderSize + vp8Size;
// Emit headers and partition #0
- this.WriteWebpHeaders(stream, size0, vp8Size, riffSize, isVp8X, width, height, exifProfile, xmpProfile, hasAlpha);
+ this.WriteWebpHeaders(stream, size0, vp8Size, riffSize, isVp8X, width, height, exifProfile, xmpProfile, hasAlpha, alphaData, alphaDataIsCompressed);
bitWriterPartZero.WriteToStream(stream);
// Write the encoded image to the stream.
@@ -639,7 +658,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
while (it.Next());
}
- private void WriteWebpHeaders(Stream stream, uint size0, uint vp8Size, uint riffSize, bool isVp8X, uint width, uint height, ExifProfile exifProfile, XmpProfile xmpProfile, bool hasAlpha)
+ private void WriteWebpHeaders(
+ Stream stream,
+ uint size0,
+ uint vp8Size,
+ uint riffSize,
+ bool isVp8X,
+ uint width,
+ uint height,
+ ExifProfile exifProfile,
+ XmpProfile xmpProfile,
+ bool hasAlpha,
+ Span alphaData,
+ bool alphaDataIsCompressed)
{
this.WriteRiffHeader(stream, riffSize);
@@ -647,6 +678,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitWriter
if (isVp8X)
{
this.WriteVp8XHeader(stream, exifProfile, xmpProfile, width, height, hasAlpha);
+ if (hasAlpha)
+ {
+ this.WriteAlphaChunk(stream, alphaData, alphaDataIsCompressed);
+ }
}
this.WriteVp8Header(stream, vp8Size);
diff --git a/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs b/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs
index d119d3031f..57ec32753d 100644
--- a/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs
+++ b/src/ImageSharp/Formats/Webp/IWebpEncoderOptions.cs
@@ -31,6 +31,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
///
/// Gets a value indicating whether the alpha plane should be compressed with Webp lossless format.
+ /// Defaults to true.
///
bool UseAlphaCompression { get; }
diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs
index e9dce913a3..30d65562ae 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs
@@ -228,7 +228,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public Vp8LHashChain HashChain { get; }
///
- /// Encodes the image to the specified stream from the .
+ /// Encodes the image as lossless webp to the specified stream.
///
/// The pixel format.
/// The to encode from.
@@ -236,10 +236,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public void Encode(Image image, Stream stream)
where TPixel : unmanaged, IPixel
{
- image.Metadata.SyncProfiles();
int width = image.Width;
int height = image.Height;
+ ImageMetadata metadata = image.Metadata;
+ metadata.SyncProfiles();
+
// Convert image pixels to bgra array.
bool hasAlpha = this.ConvertPixelsToBgra(image, width, height);
@@ -253,11 +255,42 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.EncodeStream(image);
// Write bytes from the bitwriter buffer to the stream.
- ImageMetadata metadata = image.Metadata;
- metadata.SyncProfiles();
this.bitWriter.WriteEncodedImageToStream(stream, metadata.ExifProfile, metadata.XmpProfile, (uint)width, (uint)height, hasAlpha);
}
+ ///
+ /// Encodes the alpha image data using the webp lossless compression.
+ ///
+ /// The type of the pixel.
+ /// The to encode from.
+ /// The destination buffer to write the encoded alpha data to.
+ /// The size of the compressed data in bytes.
+ /// If the size of the data is the same as the pixel count, the compression would not yield in smaller data and is left uncompressed.
+ ///
+ public int EncodeAlphaImageData(Image image, IMemoryOwner alphaData)
+ where TPixel : unmanaged, IPixel
+ {
+ int width = image.Width;
+ int height = image.Height;
+ int pixelCount = width * height;
+
+ // Convert image pixels to bgra array.
+ this.ConvertPixelsToBgra(image, width, height);
+
+ // The image-stream will NOT contain any headers describing the image dimension, the dimension is already known.
+ this.EncodeStream(image);
+ this.bitWriter.Finish();
+ int size = this.bitWriter.NumBytes();
+ if (size >= pixelCount)
+ {
+ // Compressing would not yield in smaller data -> leave the data uncompressed.
+ return pixelCount;
+ }
+
+ this.bitWriter.WriteToBuffer(alphaData.GetSpan());
+ return size;
+ }
+
///
/// Writes the image size to the bitwriter buffer.
///
diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs
index 0222320502..695359e5ea 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs
@@ -71,12 +71,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
///
private int uvAlpha;
- ///
- /// Scratch buffer to reduce allocations.
- ///
- private readonly int[] scratch = new int[16];
-
- private readonly byte[] averageBytesPerMb = { 50, 24, 16, 9, 7, 5, 3, 2 };
+ private readonly bool alphaCompression;
private const int NumMbSegments = 4;
@@ -105,6 +100,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// Number of entropy-analysis passes (in [1..10]).
/// The filter the strength of the deblocking filter, between 0 (no filtering) and 100 (maximum filtering).
/// The spatial noise shaping. 0=off, 100=maximum.
+ /// If true, the alpha channel will be compressed with the lossless compression.
public Vp8Encoder(
MemoryAllocator memoryAllocator,
Configuration configuration,
@@ -114,7 +110,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
WebpEncodingMethod method,
int entropyPasses,
int filterStrength,
- int spatialNoiseShaping)
+ int spatialNoiseShaping,
+ bool alphaCompression)
{
this.memoryAllocator = memoryAllocator;
this.configuration = configuration;
@@ -125,6 +122,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.entropyPasses = Numerics.Clamp(entropyPasses, 1, 10);
this.filterStrength = Numerics.Clamp(filterStrength, 0, 100);
this.spatialNoiseShaping = Numerics.Clamp(spatialNoiseShaping, 0, 100);
+ this.alphaCompression = alphaCompression;
this.rdOptLevel = method is WebpEncodingMethod.BestQuality ? Vp8RdLevel.RdOptTrellisAll
: method >= WebpEncodingMethod.Level5 ? Vp8RdLevel.RdOptTrellis
: method >= WebpEncodingMethod.Level3 ? Vp8RdLevel.RdOptBasic
@@ -174,6 +172,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.ResetBoundaryPredictions();
}
+ // This uses C#'s optimization to refer to the static data segment of the assembly, no allocation occurs.
+ private static ReadOnlySpan AverageBytesPerMb => new byte[] { 50, 24, 16, 9, 7, 5, 3, 2 };
+
public int BaseQuant { get; set; }
///
@@ -297,10 +298,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
int width = image.Width;
int height = image.Height;
+ int pixelCount = width * height;
Span y = this.Y.GetSpan();
Span u = this.U.GetSpan();
Span v = this.V.GetSpan();
- YuvConversion.ConvertRgbToYuv(image, this.configuration, this.memoryAllocator, y, u, v);
+ bool hasAlpha = YuvConversion.ConvertRgbToYuv(image, this.configuration, this.memoryAllocator, y, u, v);
int yStride = width;
int uvStride = (yStride + 1) >> 1;
@@ -318,12 +320,26 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.SetLoopParams(this.quality);
// Initialize the bitwriter.
- int averageBytesPerMacroBlock = this.averageBytesPerMb[this.BaseQuant >> 4];
+ int averageBytesPerMacroBlock = AverageBytesPerMb[this.BaseQuant >> 4];
int expectedSize = this.Mbw * this.Mbh * averageBytesPerMacroBlock;
this.bitWriter = new Vp8BitWriter(expectedSize, this);
- // TODO: EncodeAlpha();
- bool hasAlpha = false;
+ // Extract and encode alpha channel data, if present.
+ int alphaDataSize = 0;
+ bool alphaCompressionSucceeded = false;
+ using var alphaEncoder = new AlphaEncoder();
+ Span alphaData = Span.Empty;
+ if (hasAlpha)
+ {
+ // TODO: This can potentially run in an separate task.
+ IMemoryOwner encodedAlphaData = alphaEncoder.EncodeAlpha(image, this.configuration, this.memoryAllocator, this.alphaCompression, out alphaDataSize);
+ alphaData = encodedAlphaData.GetSpan();
+ if (alphaDataSize < pixelCount)
+ {
+ // Only use compressed data, if the compressed data is actually smaller then the uncompressed data.
+ alphaCompressionSucceeded = true;
+ }
+ }
// Stats-collection loop.
this.StatLoop(width, height, yStride, uvStride);
@@ -358,7 +374,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
// Write bytes from the bitwriter buffer to the stream.
ImageMetadata metadata = image.Metadata;
metadata.SyncProfiles();
- this.bitWriter.WriteEncodedImageToStream(stream, metadata.ExifProfile, metadata.XmpProfile, (uint)width, (uint)height, hasAlpha);
+ this.bitWriter.WriteEncodedImageToStream(
+ stream,
+ metadata.ExifProfile,
+ metadata.XmpProfile,
+ (uint)width,
+ (uint)height,
+ hasAlpha,
+ alphaData,
+ this.alphaCompression && alphaCompressionSucceeded);
}
///
diff --git a/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs b/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs
index 7a731f4284..878bebd105 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs
@@ -318,7 +318,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// Span to store the luma component of the image.
/// Span to store the u component of the image.
/// Span to store the v component of the image.
- public static void ConvertRgbToYuv(Image image, Configuration configuration, MemoryAllocator memoryAllocator, Span y, Span u, Span v)
+ /// true, if the image contains alpha data.
+ public static bool ConvertRgbToYuv(Image image, Configuration configuration, MemoryAllocator memoryAllocator, Span y, Span u, Span v)
where TPixel : unmanaged, IPixel
{
Buffer2D imageBuffer = image.Frames.RootFrame.PixelBuffer;
@@ -335,6 +336,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Span bgraRow1 = bgraRow1Buffer.GetSpan();
int uvRowIndex = 0;
int rowIndex;
+ bool hasAlpha = false;
for (rowIndex = 0; rowIndex < height - 1; rowIndex += 2)
{
Span rowSpan = imageBuffer.DangerousGetRowSpan(rowIndex);
@@ -343,6 +345,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
PixelOperations.Instance.ToBgra32(configuration, nextRowSpan, bgraRow1);
bool rowsHaveAlpha = WebpCommonUtils.CheckNonOpaque(bgraRow0) && WebpCommonUtils.CheckNonOpaque(bgraRow1);
+ if (rowsHaveAlpha)
+ {
+ hasAlpha = true;
+ }
// Downsample U/V planes, two rows at a time.
if (!rowsHaveAlpha)
@@ -375,10 +381,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
else
{
AccumulateRgba(bgraRow0, bgraRow0, tmpRgbSpan, width);
+ hasAlpha = true;
}
ConvertRgbaToUv(tmpRgbSpan, u.Slice(uvRowIndex * uvWidth), v.Slice(uvRowIndex * uvWidth), uvWidth);
}
+
+ return hasAlpha;
}
///
diff --git a/src/ImageSharp/Formats/Webp/WebpEncoder.cs b/src/ImageSharp/Formats/Webp/WebpEncoder.cs
index bdcbb194b1..d0b60d18cd 100644
--- a/src/ImageSharp/Formats/Webp/WebpEncoder.cs
+++ b/src/ImageSharp/Formats/Webp/WebpEncoder.cs
@@ -24,7 +24,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
public WebpEncodingMethod Method { get; set; } = WebpEncodingMethod.Default;
///
- public bool UseAlphaCompression { get; set; }
+ public bool UseAlphaCompression { get; set; } = true;
///
public int EntropyPasses { get; set; } = 1;
diff --git a/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs b/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs
index 195fa62bdc..0fbff81fe4 100644
--- a/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs
+++ b/src/ImageSharp/Formats/Webp/WebpEncoderCore.cs
@@ -22,8 +22,8 @@ namespace SixLabors.ImageSharp.Formats.Webp
private readonly MemoryAllocator memoryAllocator;
///
- /// TODO: not used at the moment.
/// Indicating whether the alpha plane should be compressed with Webp lossless format.
+ /// Defaults to true.
///
private readonly bool alphaCompression;
@@ -100,7 +100,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
}
///
- /// Encodes the image to the specified stream from the .
+ /// Encodes the image as webp to the specified stream.
///
/// The pixel format.
/// The to encode from.
@@ -149,7 +149,8 @@ namespace SixLabors.ImageSharp.Formats.Webp
this.method,
this.entropyPasses,
this.filterStrength,
- this.spatialNoiseShaping);
+ this.spatialNoiseShaping,
+ this.alphaCompression);
enc.Encode(image, stream);
}
}
diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpegParseStreamOnly.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpegParseStreamOnly.cs
index 9db666c374..988c056608 100644
--- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpegParseStreamOnly.cs
+++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpegParseStreamOnly.cs
@@ -39,10 +39,9 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
using var memoryStream = new MemoryStream(this.jpegBytes);
using var bufferedStream = new BufferedReadStream(Configuration.Default, memoryStream);
- var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder { IgnoreMetadata = true });
+ using var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder { IgnoreMetadata = true });
var scanDecoder = new HuffmanScanDecoder(bufferedStream, new NoopSpectralConverter(), cancellationToken: default);
decoder.ParseStream(bufferedStream, scanDecoder, cancellationToken: default);
- decoder.Dispose();
}
// We want to test only stream parsing and scan decoding, we don't need to convert spectral data to actual pixels
diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
index ae7e81254b..9576cbd3c8 100644
--- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
@@ -220,7 +220,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
// Reference implementation quantizes given block via division
Block8x8 expected = default;
- ReferenceImplementations.Quantize(ref source, ref expected, ref quant, ZigZag.ZigZagOrder);
+ ReferenceImplementations.Quantize(ref source, ref expected, ref quant, ZigZag.TransposingOrder);
// Actual current implementation quantizes given block via multiplication
// With quantization table reciprocal
diff --git a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
index 36570ce55a..9c467a1cc9 100644
--- a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
@@ -135,10 +135,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
srcBlock.MultiplyInPlace(ref dequantMatrix);
+ // testee
// IDCT implementation tranforms blocks after transposition
srcBlock.TransposeInplace();
-
- // IDCT calculation
FastFloatingPointDCT.TransformIDCT(ref srcBlock);
float[] actualDest = srcBlock.ToArray();
@@ -180,7 +179,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true);
// testee
+ // Second transpose call is done by Quantize step
+ // Do this manually here just to be complient to the reference implementation
FastFloatingPointDCT.TransformFDCT(ref block);
+ block.TransposeInplace();
// Part of the IDCT calculations is fused into the quantization step
// We must multiply input block with adjusted no-quantization matrix
diff --git a/tests/ImageSharp.Tests/Formats/Jpg/SpectralJpegTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/SpectralJpegTests.cs
index 35113f14ff..3833b419c4 100644
--- a/tests/ImageSharp.Tests/Formats/Jpg/SpectralJpegTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/SpectralJpegTests.cs
@@ -50,7 +50,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
// Calculating data from ImageSharp
byte[] sourceBytes = TestFile.Create(provider.SourceFileOrDescription).Bytes;
- var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder());
+ using var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder());
using var ms = new MemoryStream(sourceBytes);
using var bufferedStream = new BufferedReadStream(Configuration.Default, ms);
@@ -79,7 +79,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
// Calculating data from ImageSharp
byte[] sourceBytes = TestFile.Create(provider.SourceFileOrDescription).Bytes;
- var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder());
+ using var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder());
using var ms = new MemoryStream(sourceBytes);
using var bufferedStream = new BufferedReadStream(Configuration.Default, ms);
diff --git a/tests/ImageSharp.Tests/Formats/Jpg/SpectralToPixelConversionTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/SpectralToPixelConversionTests.cs
index 0071c623c6..27240831c3 100644
--- a/tests/ImageSharp.Tests/Formats/Jpg/SpectralToPixelConversionTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/SpectralToPixelConversionTests.cs
@@ -45,7 +45,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
// Decoding
using var converter = new SpectralConverter(Configuration.Default);
- var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder());
+ using var decoder = new JpegDecoderCore(Configuration.Default, new JpegDecoder());
var scanDecoder = new HuffmanScanDecoder(bufferedStream, converter, cancellationToken: default);
decoder.ParseStream(bufferedStream, scanDecoder, cancellationToken: default);
diff --git a/tests/ImageSharp.Tests/Formats/WebP/WebpEncoderTests.cs b/tests/ImageSharp.Tests/Formats/WebP/WebpEncoderTests.cs
index 7043549b22..7c74429edc 100644
--- a/tests/ImageSharp.Tests/Formats/WebP/WebpEncoderTests.cs
+++ b/tests/ImageSharp.Tests/Formats/WebP/WebpEncoderTests.cs
@@ -167,18 +167,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
image.VerifyEncoder(provider, "webp", testOutputDetails, encoder);
}
- [Theory]
- [WithFile(TestPatternOpaque, PixelTypes.Rgba32)]
- [WithFile(TestPatternOpaqueSmall, PixelTypes.Rgba32)]
- public void Encode_Lossless_WorksWithTestPattern(TestImageProvider provider)
- where TPixel : unmanaged, IPixel
- {
- using Image image = provider.GetImage();
-
- var encoder = new WebpEncoder() { FileFormat = WebpFileFormatType.Lossless };
- image.VerifyEncoder(provider, "webp", string.Empty, encoder);
- }
-
[Fact]
public void Encode_Lossless_OneByOnePixel_Works()
{
@@ -279,6 +267,34 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
image.VerifyEncoder(provider, "webp", testOutputDetails, encoder, customComparer: GetComparer(quality));
}
+ [Theory]
+ [WithFile(TestImages.Png.Transparency, PixelTypes.Rgba32, false)]
+ [WithFile(TestImages.Png.Transparency, PixelTypes.Rgba32, true)]
+ public void Encode_Lossy_WithAlpha_Works(TestImageProvider provider, bool compressed)
+ where TPixel : unmanaged, IPixel
+ {
+ var encoder = new WebpEncoder()
+ {
+ FileFormat = WebpFileFormatType.Lossy,
+ UseAlphaCompression = compressed
+ };
+
+ using Image image = provider.GetImage();
+ image.VerifyEncoder(provider, "webp", $"with_alpha_compressed_{compressed}", encoder, ImageComparer.Tolerant(0.04f));
+ }
+
+ [Theory]
+ [WithFile(TestPatternOpaque, PixelTypes.Rgba32)]
+ [WithFile(TestPatternOpaqueSmall, PixelTypes.Rgba32)]
+ public void Encode_Lossless_WorksWithTestPattern(TestImageProvider provider)
+ where TPixel : unmanaged, IPixel
+ {
+ using Image image = provider.GetImage();
+
+ var encoder = new WebpEncoder() { FileFormat = WebpFileFormatType.Lossless };
+ image.VerifyEncoder(provider, "webp", string.Empty, encoder);
+ }
+
[Theory]
[WithFile(TestPatternOpaque, PixelTypes.Rgba32)]
[WithFile(TestPatternOpaqueSmall, PixelTypes.Rgba32)]
diff --git a/tests/ImageSharp.Tests/Memory/Allocators/UniformUnmanagedMemoryPoolTests.cs b/tests/ImageSharp.Tests/Memory/Allocators/UniformUnmanagedMemoryPoolTests.cs
index 4ab2c93a52..7d98eff611 100644
--- a/tests/ImageSharp.Tests/Memory/Allocators/UniformUnmanagedMemoryPoolTests.cs
+++ b/tests/ImageSharp.Tests/Memory/Allocators/UniformUnmanagedMemoryPoolTests.cs
@@ -245,7 +245,10 @@ namespace SixLabors.ImageSharp.Tests.Memory.Allocators
cleanup.Register(b1);
}
- [Theory]
+ public static readonly bool IsNotMacOS = !TestEnvironment.IsOSX;
+
+ // TODO: Investigate MacOS failures
+ [ConditionalTheory(nameof(IsNotMacOS))]
[InlineData(false)]
[InlineData(true)]
public void RentReturnRelease_SubsequentRentReturnsDifferentHandles(bool multiple)
diff --git a/tests/ImageSharp.Tests/TestImages.cs b/tests/ImageSharp.Tests/TestImages.cs
index 172cfd7161..5ff71ba396 100644
--- a/tests/ImageSharp.Tests/TestImages.cs
+++ b/tests/ImageSharp.Tests/TestImages.cs
@@ -15,6 +15,7 @@ namespace SixLabors.ImageSharp.Tests
{
public static class Png
{
+ public const string Transparency = "Png/transparency.png";
public const string P1 = "Png/pl.png";
public const string Pd = "Png/pd.png";
public const string Blur = "Png/blur.png";
diff --git a/tests/Images/Input/Png/transparency.png b/tests/Images/Input/Png/transparency.png
new file mode 100644
index 0000000000..26de0f2d1a
--- /dev/null
+++ b/tests/Images/Input/Png/transparency.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:843bea4db378f52935e2f19f60d289df8ebe20ddde3977c63225f1d58a10bd62
+size 48119