Browse Source

Merge branch 'master' into bp/tiffgray

pull/1652/head
Brian Popow 5 years ago
committed by GitHub
parent
commit
0726de48ee
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      shared-infrastructure
  2. 45
      src/ImageSharp/Common/Helpers/Numerics.cs
  3. 25
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  4. 228
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
  5. 10
      src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs
  6. 392
      src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs
  7. 2
      src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs
  8. 148
      src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs
  9. 195
      src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs
  10. 121
      src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs
  11. 122
      src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs
  12. 90
      src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs
  13. 463
      src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
  14. 642
      src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs
  15. 3
      src/ImageSharp/Formats/Jpeg/JpegThrowHelper.cs
  16. 4
      src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs
  17. 38
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Scale16X16To8X8.cs
  18. 91
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs
  19. 4
      tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs
  20. 264
      tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
  21. 214
      tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs
  22. 3
      tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeKernelMapTests.cs

2
shared-infrastructure

@ -1 +1 @@
Subproject commit 48e73f455f15eafefbe3175efc7433e5f277e506
Subproject commit 1f7ee702812f3a1713ab7f749c0faae0ef139ed7

45
src/ImageSharp/Common/Helpers/Numerics.cs

@ -23,6 +23,28 @@ namespace SixLabors.ImageSharp
private const int ShuffleAlphaControl = 0b_11_11_11_11;
#endif
#if !SUPPORTS_BITOPERATIONS
/// <summary>
/// Gets the counts the number of bits needed to hold an integer.
/// </summary>
private static ReadOnlySpan<byte> BitCountLut => new byte[]
{
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8,
};
#endif
/// <summary>
/// Determine the Greatest CommonDivisor (GCD) of two numbers.
/// </summary>
@ -756,7 +778,7 @@ namespace SixLabors.ImageSharp
/// widening them to 32-bit integers and performing four additions.
/// </summary>
/// <remarks>
/// <code>byte(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)</code>
/// <c>byte(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)</c>
/// is widened and added onto <paramref name="accumulator"/> as such:
/// <code>
/// accumulator += i32(1, 2, 3, 4);
@ -825,5 +847,26 @@ namespace SixLabors.ImageSharp
return Sse2.ConvertToInt32(vsum);
}
#endif
/// <summary>
/// Calculates how many minimum bits needed to store given value.
/// </summary>
/// <param name="number">Unsigned integer to store</param>
/// <returns>Minimum number of bits needed to store given value</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int MinimumBitsToStore16(uint number)
{
#if !SUPPORTS_BITOPERATIONS
if (number < 0x100)
{
return BitCountLut[(int)number];
}
return 8 + BitCountLut[(int)number >> 8];
#else
const int bitInUnsignedInteger = sizeof(uint) * 8;
return bitInUnsignedInteger - BitOperations.LeadingZeroCount(number);
#endif
}
}
}

25
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -532,6 +532,7 @@ namespace SixLabors.ImageSharp
/// <summary>
/// Performs a multiplication and an addition of the <see cref="Vector256{T}"/>.
/// </summary>
/// <remarks>ret = (vm0 * vm1) + va</remarks>
/// <param name="va">The vector to add to the intermediate result.</param>
/// <param name="vm0">The first vector to multiply.</param>
/// <param name="vm1">The second vector to multiply.</param>
@ -552,6 +553,30 @@ namespace SixLabors.ImageSharp
}
}
/// <summary>
/// Performs a multiplication and a substraction of the <see cref="Vector256{T}"/>.
/// </summary>
/// <remarks>ret = (vm0 * vm1) - vs</remarks>
/// <param name="vs">The vector to substract from the intermediate result.</param>
/// <param name="vm0">The first vector to multiply.</param>
/// <param name="vm1">The second vector to multiply.</param>
/// <returns>The <see cref="Vector256{T}"/>.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public static Vector256<float> MultiplySubstract(
in Vector256<float> vs,
in Vector256<float> vm0,
in Vector256<float> vm1)
{
if (Fma.IsSupported)
{
return Fma.MultiplySubtract(vm1, vm0, vs);
}
else
{
return Avx.Subtract(Avx.Multiply(vm0, vm1), vs);
}
}
/// <summary>
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>

228
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <summary>
/// Represents a Jpeg block with <see cref="float"/> coefficients.
/// </summary>
[StructLayout(LayoutKind.Sequential)]
[StructLayout(LayoutKind.Explicit)]
internal partial struct Block8x8F : IEquatable<Block8x8F>
{
/// <summary>
@ -27,29 +27,69 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public const int Size = 64;
#pragma warning disable SA1600 // ElementsMustBeDocumented
[FieldOffset(0)]
public Vector4 V0L;
[FieldOffset(16)]
public Vector4 V0R;
[FieldOffset(32)]
public Vector4 V1L;
[FieldOffset(48)]
public Vector4 V1R;
[FieldOffset(64)]
public Vector4 V2L;
[FieldOffset(80)]
public Vector4 V2R;
[FieldOffset(96)]
public Vector4 V3L;
[FieldOffset(112)]
public Vector4 V3R;
[FieldOffset(128)]
public Vector4 V4L;
[FieldOffset(144)]
public Vector4 V4R;
[FieldOffset(160)]
public Vector4 V5L;
[FieldOffset(176)]
public Vector4 V5R;
[FieldOffset(192)]
public Vector4 V6L;
[FieldOffset(208)]
public Vector4 V6R;
[FieldOffset(224)]
public Vector4 V7L;
[FieldOffset(240)]
public Vector4 V7R;
#if SUPPORTS_RUNTIME_INTRINSICS
/// <summary>
/// A number of rows of 8 scalar coefficients each in <see cref="Block8x8F"/>
/// </summary>
public const int RowCount = 8;
[FieldOffset(0)]
public Vector256<float> V0;
[FieldOffset(32)]
public Vector256<float> V1;
[FieldOffset(64)]
public Vector256<float> V2;
[FieldOffset(96)]
public Vector256<float> V3;
[FieldOffset(128)]
public Vector256<float> V4;
[FieldOffset(160)]
public Vector256<float> V5;
[FieldOffset(192)]
public Vector256<float> V6;
[FieldOffset(224)]
public Vector256<float> V7;
#endif
#pragma warning restore SA1600 // ElementsMustBeDocumented
/// <summary>
@ -278,14 +318,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
if (Avx.IsSupported)
{
var valueVec = Vector256.Create(value);
Unsafe.As<Vector4, Vector256<float>>(ref this.V0L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V0L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V1L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V1L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V2L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V2L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V3L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V3L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V4L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V4L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V5L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V5L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V6L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V6L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V7L) = Avx.Multiply(Unsafe.As<Vector4, Vector256<float>>(ref this.V7L), valueVec);
this.V0 = Avx.Multiply(this.V0, valueVec);
this.V1 = Avx.Multiply(this.V1, valueVec);
this.V2 = Avx.Multiply(this.V2, valueVec);
this.V3 = Avx.Multiply(this.V3, valueVec);
this.V4 = Avx.Multiply(this.V4, valueVec);
this.V5 = Avx.Multiply(this.V5, valueVec);
this.V6 = Avx.Multiply(this.V6, valueVec);
this.V7 = Avx.Multiply(this.V7, valueVec);
}
else
#endif
@ -319,45 +359,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
Unsafe.As<Vector4, Vector256<float>>(ref this.V0L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V0L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V0L));
Unsafe.As<Vector4, Vector256<float>>(ref this.V1L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V1L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V1L));
Unsafe.As<Vector4, Vector256<float>>(ref this.V2L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V2L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V2L));
Unsafe.As<Vector4, Vector256<float>>(ref this.V3L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V3L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V3L));
Unsafe.As<Vector4, Vector256<float>>(ref this.V4L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V4L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V4L));
Unsafe.As<Vector4, Vector256<float>>(ref this.V5L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V5L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V5L));
Unsafe.As<Vector4, Vector256<float>>(ref this.V6L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V6L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V6L));
Unsafe.As<Vector4, Vector256<float>>(ref this.V7L)
= Avx.Multiply(
Unsafe.As<Vector4, Vector256<float>>(ref this.V7L),
Unsafe.As<Vector4, Vector256<float>>(ref other.V7L));
this.V0 = Avx.Multiply(this.V0, other.V0);
this.V1 = Avx.Multiply(this.V1, other.V1);
this.V2 = Avx.Multiply(this.V2, other.V2);
this.V3 = Avx.Multiply(this.V3, other.V3);
this.V4 = Avx.Multiply(this.V4, other.V4);
this.V5 = Avx.Multiply(this.V5, other.V5);
this.V6 = Avx.Multiply(this.V6, other.V6);
this.V7 = Avx.Multiply(this.V7, other.V7);
}
else
#endif
@ -392,14 +401,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
if (Avx.IsSupported)
{
var valueVec = Vector256.Create(value);
Unsafe.As<Vector4, Vector256<float>>(ref this.V0L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V0L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V1L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V1L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V2L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V2L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V3L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V3L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V4L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V4L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V5L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V5L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V6L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V6L), valueVec);
Unsafe.As<Vector4, Vector256<float>>(ref this.V7L) = Avx.Add(Unsafe.As<Vector4, Vector256<float>>(ref this.V7L), valueVec);
this.V0 = Avx.Add(this.V0, valueVec);
this.V1 = Avx.Add(this.V1, valueVec);
this.V2 = Avx.Add(this.V2, valueVec);
this.V3 = Avx.Add(this.V3, valueVec);
this.V4 = Avx.Add(this.V4, valueVec);
this.V5 = Avx.Add(this.V5, valueVec);
this.V6 = Avx.Add(this.V6, valueVec);
this.V7 = Avx.Add(this.V7, valueVec);
}
else
#endif
@ -468,81 +477,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
DivideRoundAll(ref dest, ref qt);
}
/// <summary>
/// Scales the 16x16 region represented by the 4 source blocks to the 8x8 DST block.
/// </summary>
/// <param name="destination">The destination block.</param>
/// <param name="source">The source block.</param>
public static unsafe void Scale16X16To8X8(ref Block8x8F destination, ReadOnlySpan<Block8x8F> source)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
Scale16X16To8X8Vectorized(ref destination, source);
return;
}
#endif
Scale16X16To8X8Scalar(ref destination, source);
}
private static void Scale16X16To8X8Vectorized(ref Block8x8F destination, ReadOnlySpan<Block8x8F> source)
{
#if SUPPORTS_RUNTIME_INTRINSICS
Debug.Assert(Avx2.IsSupported, "AVX2 is required to execute this method");
var f2 = Vector256.Create(2f);
var f025 = Vector256.Create(0.25f);
Vector256<int> switchInnerDoubleWords = Unsafe.As<byte, Vector256<int>>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32));
ref Vector256<float> destRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref destination);
for (int i = 0; i < 2; i++)
{
ref Vector256<float> in1 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 2 * i));
ref Vector256<float> in2 = ref Unsafe.As<Block8x8F, Vector256<float>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), (2 * i) + 1));
for (int j = 0; j < 8; j += 2)
{
Vector256<float> a = Unsafe.Add(ref in1, j);
Vector256<float> b = Unsafe.Add(ref in1, j + 1);
Vector256<float> c = Unsafe.Add(ref in2, j);
Vector256<float> d = Unsafe.Add(ref in2, j + 1);
Vector256<float> calc1 = Avx.Shuffle(a, c, 0b10_00_10_00);
Vector256<float> calc2 = Avx.Shuffle(a, c, 0b11_01_11_01);
Vector256<float> calc3 = Avx.Shuffle(b, d, 0b10_00_10_00);
Vector256<float> calc4 = Avx.Shuffle(b, d, 0b11_01_11_01);
Vector256<float> sum = Avx.Add(Avx.Add(calc1, calc2), Avx.Add(calc3, calc4));
Vector256<float> add = Avx.Add(sum, f2);
Vector256<float> res = Avx.Multiply(add, f025);
destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords);
destRef = ref Unsafe.Add(ref destRef, 1);
}
}
#endif
}
private static unsafe void Scale16X16To8X8Scalar(ref Block8x8F destination, ReadOnlySpan<Block8x8F> source)
{
for (int i = 0; i < 4; i++)
{
int dstOff = ((i & 2) << 4) | ((i & 1) << 2);
Block8x8F iSource = source[i];
for (int y = 0; y < 4; y++)
{
for (int x = 0; x < 4; x++)
{
int j = (16 * y) + (2 * x);
float sum = iSource[j] + iSource[j + 1] + iSource[j + 8] + iSource[j + 9];
destination[(8 * y) + x + dstOff] = (sum + 2) * .25F;
}
}
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b)
{
@ -553,19 +487,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
var vadd = Vector256.Create(.5F);
var vone = Vector256.Create(1f);
ref Vector256<float> aBase = ref Unsafe.AsRef(Unsafe.As<Vector4, Vector256<float>>(ref a.V0L));
ref Vector256<float> bBase = ref Unsafe.AsRef(Unsafe.As<Vector4, Vector256<float>>(ref b.V0L));
ref Vector256<float> aEnd = ref Unsafe.Add(ref aBase, 8);
do
for (int i = 0; i < RowCount; i++)
{
Vector256<float> voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aBase), vone), vadd);
Unsafe.Add(ref aBase, 0) = Avx.Add(Avx.Divide(aBase, bBase), voff);
aBase = ref Unsafe.Add(ref aBase, 1);
bBase = ref Unsafe.Add(ref bBase, 1);
ref Vector256<float> aRow = ref Unsafe.Add(ref a.V0, i);
ref Vector256<float> bRow = ref Unsafe.Add(ref b.V0, i);
Vector256<float> voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aRow), vone), vadd);
aRow = Avx.Add(Avx.Divide(aRow, bRow), voff);
}
while (Unsafe.IsAddressLessThan(ref aBase, ref aEnd));
}
else
#endif
@ -805,26 +733,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
Vector256<float> t2 = Avx.UnpackLow(r2, r3);
Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V0L) = Avx.Blend(t0, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V1L) = Avx.Blend(t2, v, 0x33);
d.V0 = Avx.Blend(t0, v, 0xCC);
d.V1 = Avx.Blend(t2, v, 0x33);
Vector256<float> t4 = Avx.UnpackLow(r4, r5);
Vector256<float> t6 = Avx.UnpackLow(r6, r7);
v = Avx.Shuffle(t4, t6, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V4L) = Avx.Blend(t4, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V5L) = Avx.Blend(t6, v, 0x33);
d.V4 = Avx.Blend(t4, v, 0xCC);
d.V5 = Avx.Blend(t6, v, 0x33);
Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
v = Avx.Shuffle(t1, t3, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V2L) = Avx.Blend(t1, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V3L) = Avx.Blend(t3, v, 0x33);
d.V2 = Avx.Blend(t1, v, 0xCC);
d.V3 = Avx.Blend(t3, v, 0x33);
Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
v = Avx.Shuffle(t5, t7, 0x4E);
Unsafe.As<Vector4, Vector256<float>>(ref d.V6L) = Avx.Blend(t5, v, 0xCC);
Unsafe.As<Vector4, Vector256<float>>(ref d.V7L) = Avx.Blend(t7, v, 0x33);
d.V6 = Avx.Blend(t5, v, 0xCC);
d.V7 = Avx.Blend(t7, v, 0x33);
}
else
#endif

10
src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs

@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
@ -44,7 +44,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
}
}
this.Values = new uint[maxValue + 1];
this.Values = new int[maxValue + 1];
int code = 0;
int k = 0;
@ -54,7 +54,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
int bits = (i + 1) << 24;
for (int j = 0; j < spec.Count[i]; j++)
{
this.Values[spec.Values[k]] = (uint)(bits | code);
this.Values[spec.Values[k]] = bits | code;
code++;
k++;
}
@ -66,6 +66,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
/// <summary>
/// Gets the collection of huffman values.
/// </summary>
public uint[] Values { get; }
public int[] Values { get; }
}
}
}

392
src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs

@ -0,0 +1,392 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.IO;
using System.Runtime.CompilerServices;
using System.Threading;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
internal class HuffmanScanEncoder
{
/// <summary>
/// Number of bytes cached before being written to target stream via Stream.Write(byte[], offest, count).
/// </summary>
/// <remarks>
/// This is subject to change, 1024 seems to be the best value in terms of performance.
/// <see cref="Emit(int, int)"/> expects it to be at least 8 (see comments in method body).
/// </remarks>
private const int EmitBufferSizeInBytes = 1024;
/// <summary>
/// A buffer for reducing the number of stream writes when emitting Huffman tables.
/// </summary>
private readonly byte[] emitBuffer = new byte[EmitBufferSizeInBytes];
/// <summary>
/// Number of filled bytes in <see cref="emitBuffer"/> buffer
/// </summary>
private int emitLen = 0;
/// <summary>
/// Emmited bits 'micro buffer' before being transfered to the <see cref="emitBuffer"/>.
/// </summary>
private int accumulatedBits;
/// <summary>
/// Number of jagged bits stored in <see cref="accumulatedBits"/>
/// </summary>
private int bitCount;
private Block8x8F temporalBlock1;
private Block8x8F temporalBlock2;
/// <summary>
/// The output stream. All attempted writes after the first error become no-ops.
/// </summary>
private readonly Stream target;
public HuffmanScanEncoder(Stream outputStream)
{
this.target = outputStream;
}
/// <summary>
/// Encodes the image with no subsampling.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="luminanceQuantTable">Luminance quantization table provided by the callee</param>
/// <param name="chrominanceQuantTable">Chrominance quantization table provided by the callee</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
public void Encode444<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
var unzig = ZigZag.CreateUnzigTable();
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
ImageFrame<TPixel> frame = pixels.Frames.RootFrame;
Buffer2D<TPixel> pixelBuffer = frame.PixelBuffer;
RowOctet<TPixel> currentRows = default;
var pixelConverter = new YCbCrForwardConverter444<TPixel>(frame);
for (int y = 0; y < pixels.Height; y += 8)
{
cancellationToken.ThrowIfCancellationRequested();
currentRows.Update(pixelBuffer, y);
for (int x = 0; x < pixels.Width; x += 8)
{
pixelConverter.Convert(x, y, ref currentRows);
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
ref luminanceQuantTable,
ref unzig);
prevDCCb = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCb,
ref pixelConverter.Cb,
ref chrominanceQuantTable,
ref unzig);
prevDCCr = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCr,
ref pixelConverter.Cr,
ref chrominanceQuantTable,
ref unzig);
}
}
this.FlushInternalBuffer();
}
/// <summary>
/// Encodes the image with subsampling. The Cb and Cr components are each subsampled
/// at a factor of 2 both horizontally and vertically.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="luminanceQuantTable">Luminance quantization table provided by the callee</param>
/// <param name="chrominanceQuantTable">Chrominance quantization table provided by the callee</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
public void Encode420<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
var unzig = ZigZag.CreateUnzigTable();
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
ImageFrame<TPixel> frame = pixels.Frames.RootFrame;
Buffer2D<TPixel> pixelBuffer = frame.PixelBuffer;
RowOctet<TPixel> currentRows = default;
var pixelConverter = new YCbCrForwardConverter420<TPixel>(frame);
for (int y = 0; y < pixels.Height; y += 16)
{
cancellationToken.ThrowIfCancellationRequested();
for (int x = 0; x < pixels.Width; x += 16)
{
for (int i = 0; i < 2; i++)
{
int yOff = i * 8;
currentRows.Update(pixelBuffer, y + yOff);
pixelConverter.Convert(x, y, ref currentRows, i);
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.YLeft,
ref luminanceQuantTable,
ref unzig);
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.YRight,
ref luminanceQuantTable,
ref unzig);
}
prevDCCb = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCb,
ref pixelConverter.Cb,
ref chrominanceQuantTable,
ref unzig);
prevDCCr = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCr,
ref pixelConverter.Cr,
ref chrominanceQuantTable,
ref unzig);
}
}
this.FlushInternalBuffer();
}
/// <summary>
/// Encodes the image with no chroma, just luminance.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="luminanceQuantTable">Luminance quantization table provided by the callee</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
public void EncodeGrayscale<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
var unzig = ZigZag.CreateUnzigTable();
// ReSharper disable once InconsistentNaming
int prevDCY = 0;
var pixelConverter = LuminanceForwardConverter<TPixel>.Create();
ImageFrame<TPixel> frame = pixels.Frames.RootFrame;
Buffer2D<TPixel> pixelBuffer = frame.PixelBuffer;
RowOctet<TPixel> currentRows = default;
for (int y = 0; y < pixels.Height; y += 8)
{
cancellationToken.ThrowIfCancellationRequested();
currentRows.Update(pixelBuffer, y);
for (int x = 0; x < pixels.Width; x += 8)
{
pixelConverter.Convert(frame, x, y, ref currentRows);
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
ref luminanceQuantTable,
ref unzig);
}
}
this.FlushInternalBuffer();
}
/// <summary>
/// Writes a block of pixel data using the given quantization table,
/// returning the post-quantized DC value of the DCT-transformed block.
/// The block is in natural (not zig-zag) order.
/// </summary>
/// <param name="index">The quantization table index.</param>
/// <param name="prevDC">The previous DC value.</param>
/// <param name="src">Source block</param>
/// <param name="quant">Quantization table</param>
/// <param name="unZig">The 8x8 Unzig block.</param>
/// <returns>The <see cref="int"/>.</returns>
private int WriteBlock(
QuantIndex index,
int prevDC,
ref Block8x8F src,
ref Block8x8F quant,
ref ZigZag unZig)
{
ref Block8x8F refTemp1 = ref this.temporalBlock1;
ref Block8x8F refTemp2 = ref this.temporalBlock2;
FastFloatingPointDCT.TransformFDCT(ref src, ref refTemp1, ref refTemp2);
Block8x8F.Quantize(ref refTemp1, ref refTemp2, ref quant, ref unZig);
int dc = (int)refTemp2[0];
// Emit the DC delta.
this.EmitHuffRLE((2 * (int)index) + 0, 0, dc - prevDC);
// Emit the AC components.
int h = (2 * (int)index) + 1;
int runLength = 0;
for (int zig = 1; zig < Block8x8F.Size; zig++)
{
int ac = (int)refTemp2[zig];
if (ac == 0)
{
runLength++;
}
else
{
while (runLength > 15)
{
this.EmitHuff(h, 0xf0);
runLength -= 16;
}
this.EmitHuffRLE(h, runLength, ac);
runLength = 0;
}
}
if (runLength > 0)
{
this.EmitHuff(h, 0x00);
}
return dc;
}
/// <summary>
/// Emits the least significant count of bits to the stream write buffer.
/// The precondition is bits
/// <example>
/// &lt; 1&lt;&lt;nBits &amp;&amp; nBits &lt;= 16
/// </example>
/// .
/// </summary>
/// <param name="bits">The packed bits.</param>
/// <param name="count">The number of bits</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void Emit(int bits, int count)
{
count += this.bitCount;
bits <<= 32 - count;
bits |= this.accumulatedBits;
// Only write if more than 8 bits.
if (count >= 8)
{
// Track length
while (count >= 8)
{
byte b = (byte)(bits >> 24);
this.emitBuffer[this.emitLen++] = b;
if (b == byte.MaxValue)
{
this.emitBuffer[this.emitLen++] = byte.MinValue;
}
bits <<= 8;
count -= 8;
}
// This can emit 4 times of:
// 1 byte guaranteed
// 1 extra byte.MinValue byte if previous one was byte.MaxValue
// Thus writing (1 + 1) * 4 = 8 bytes max
// So we must check if emit buffer has extra 8 bytes, if not - call stream.Write
if (this.emitLen > EmitBufferSizeInBytes - 8)
{
this.target.Write(this.emitBuffer, 0, this.emitLen);
this.emitLen = 0;
}
}
this.accumulatedBits = bits;
this.bitCount = count;
}
/// <summary>
/// Emits the given value with the given Huffman encoder.
/// </summary>
/// <param name="index">The index of the Huffman encoder</param>
/// <param name="value">The value to encode.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitHuff(int index, int value)
{
int x = HuffmanLut.TheHuffmanLut[index].Values[value];
this.Emit(x & ((1 << 24) - 1), x >> 24);
}
/// <summary>
/// Emits a run of runLength copies of value encoded with the given Huffman encoder.
/// </summary>
/// <param name="index">The index of the Huffman encoder</param>
/// <param name="runLength">The number of copies to encode.</param>
/// <param name="value">The value to encode.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitHuffRLE(int index, int runLength, int value)
{
int a = value;
int b = value;
if (a < 0)
{
a = -value;
b = value - 1;
}
int bt = Numerics.MinimumBitsToStore16((uint)a);
this.EmitHuff(index, (runLength << 4) | bt);
if (bt > 0)
{
this.Emit(b & ((1 << bt) - 1), bt);
}
}
/// <summary>
/// Writes remaining bytes from internal buffer to the target stream.
/// </summary>
/// <remarks>Pads last byte with 1's if necessary</remarks>
private void FlushInternalBuffer()
{
// pad last byte with 1's
int padBitsCount = 8 - (this.bitCount % 8);
if (padBitsCount != 0)
{
this.Emit((1 << padBitsCount) - 1, padBitsCount);
}
// flush remaining bytes
if (this.emitLen != 0)
{
this.target.Write(this.emitBuffer, 0, this.emitLen);
}
}
}
}

2
src/ImageSharp/Formats/Jpeg/Components/Encoder/LuminanceForwardConverter{TPixel}.cs

@ -49,7 +49,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
ref Block8x8F yBlock = ref this.Y;
ref L8 l8Start = ref l8Span[0];
for (int i = 0; i < 64; i++)
for (int i = 0; i < Block8x8F.Size; i++)
{
ref L8 c = ref Unsafe.Add(ref l8Start, i);
yBlock[i] = c.PackedValue;

148
src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterLut.cs

@ -92,48 +92,144 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
return tables;
}
/// <summary>
/// Optimized method to allocates the correct y, cb, and cr values to the DCT blocks from the given r, g, b values.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void ConvertPixelInto(
int r,
int g,
int b,
ref Block8x8F yResult,
ref Block8x8F cbResult,
ref Block8x8F crResult,
int i)
private float CalculateY(byte r, byte g, byte b)
{
// float y = (0.299F * r) + (0.587F * g) + (0.114F * b);
yResult[i] = (this.YRTable[r] + this.YGTable[g] + this.YBTable[b]) >> ScaleBits;
return (this.YRTable[r] + this.YGTable[g] + this.YBTable[b]) >> ScaleBits;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private float CalculateCb(byte r, byte g, byte b)
{
// float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b));
cbResult[i] = (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits;
return (this.CbRTable[r] + this.CbGTable[g] + this.CbBTable[b]) >> ScaleBits;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private float CalculateCr(byte r, byte g, byte b)
{
// float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b));
crResult[i] = (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits;
return (this.CbBTable[r] + this.CrGTable[g] + this.CrBTable[b]) >> ScaleBits;
}
public void Convert(Span<Rgb24> rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock)
/// <summary>
/// Converts Rgb24 pixels into YCbCr color space with 4:4:4 subsampling sampling of luminance and chroma.
/// </summary>
/// <param name="rgbSpan">Span of Rgb24 pixel data</param>
/// <param name="yBlock">Resulting Y values block</param>
/// <param name="cbBlock">Resulting Cb values block</param>
/// <param name="crBlock">Resulting Cr values block</param>
public void Convert444(Span<Rgb24> rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock)
{
ref Rgb24 rgbStart = ref rgbSpan[0];
for (int i = 0; i < 64; i++)
for (int i = 0; i < Block8x8F.Size; i++)
{
ref Rgb24 c = ref Unsafe.Add(ref rgbStart, i);
this.ConvertPixelInto(
c.R,
c.G,
c.B,
ref yBlock,
ref cbBlock,
ref crBlock,
i);
Rgb24 c = Unsafe.Add(ref rgbStart, i);
yBlock[i] = this.CalculateY(c.R, c.G, c.B);
cbBlock[i] = this.CalculateCb(c.R, c.G, c.B);
crBlock[i] = this.CalculateCr(c.R, c.G, c.B);
}
}
/// <summary>
/// Converts Rgb24 pixels into YCbCr color space with 4:2:0 subsampling of luminance and chroma.
/// </summary>
/// <remarks>Calculates 2 out of 4 luminance blocks and half of chroma blocks. This method must be called twice per 4x 8x8 DCT blocks with different row param.</remarks>
/// <param name="rgbSpan">Span of Rgb24 pixel data</param>
/// <param name="yBlockLeft">First or "left" resulting Y block</param>
/// <param name="yBlockRight">Second or "right" resulting Y block</param>
/// <param name="cbBlock">Resulting Cb values block</param>
/// <param name="crBlock">Resulting Cr values block</param>
/// <param name="row">Row index of the 16x16 block, 0 or 1</param>
public void Convert420(Span<Rgb24> rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row)
{
DebugGuard.MustBeBetweenOrEqualTo(row, 0, 1, nameof(row));
ref float yBlockLeftRef = ref Unsafe.As<Block8x8F, float>(ref yBlockLeft);
ref float yBlockRightRef = ref Unsafe.As<Block8x8F, float>(ref yBlockRight);
// 0-31 or 32-63
// upper or lower part
int chromaWriteOffset = row * (Block8x8F.Size / 2);
ref float cbBlockRef = ref Unsafe.Add(ref Unsafe.As<Block8x8F, float>(ref cbBlock), chromaWriteOffset);
ref float crBlockRef = ref Unsafe.Add(ref Unsafe.As<Block8x8F, float>(ref crBlock), chromaWriteOffset);
ref Rgb24 rgbStart = ref rgbSpan[0];
for (int i = 0; i < 8; i += 2)
{
int yBlockWriteOffset = i * 8;
ref Rgb24 stride = ref Unsafe.Add(ref rgbStart, i * 16);
int chromaOffset = 8 * (i / 2);
// left
this.ConvertChunk420(
ref stride,
ref Unsafe.Add(ref yBlockLeftRef, yBlockWriteOffset),
ref Unsafe.Add(ref cbBlockRef, chromaOffset),
ref Unsafe.Add(ref crBlockRef, chromaOffset));
// right
this.ConvertChunk420(
ref Unsafe.Add(ref stride, 8),
ref Unsafe.Add(ref yBlockRightRef, yBlockWriteOffset),
ref Unsafe.Add(ref cbBlockRef, chromaOffset + 4),
ref Unsafe.Add(ref crBlockRef, chromaOffset + 4));
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void ConvertChunk420(ref Rgb24 stride, ref float yBlock, ref float cbBlock, ref float crBlock)
{
// jpeg 8x8 blocks are processed as 16x16 blocks with 16x8 subpasses (this is done for performance reasons)
// each row is 16 pixels wide thus +16 stride reference offset
// resulting luminance (Y`) are sampled at original resolution thus +8 reference offset
for (int k = 0; k < 8; k += 2)
{
ref float yBlockRef = ref Unsafe.Add(ref yBlock, k);
// top row
Rgb24 px0 = Unsafe.Add(ref stride, k);
Rgb24 px1 = Unsafe.Add(ref stride, k + 1);
yBlockRef = this.CalculateY(px0.R, px0.G, px0.B);
Unsafe.Add(ref yBlockRef, 1) = this.CalculateY(px1.R, px1.G, px1.B);
// bottom row
Rgb24 px2 = Unsafe.Add(ref stride, k + 16);
Rgb24 px3 = Unsafe.Add(ref stride, k + 17);
Unsafe.Add(ref yBlockRef, 8) = this.CalculateY(px2.R, px2.G, px2.B);
Unsafe.Add(ref yBlockRef, 9) = this.CalculateY(px3.R, px3.G, px3.B);
// chroma average for 2x2 pixel block
Unsafe.Add(ref cbBlock, k / 2) = this.CalculateAverageCb(px0, px1, px2, px3);
Unsafe.Add(ref crBlock, k / 2) = this.CalculateAverageCr(px0, px1, px2, px3);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private float CalculateAverageCb(Rgb24 px0, Rgb24 px1, Rgb24 px2, Rgb24 px3)
{
return 0.25f
* (this.CalculateCb(px0.R, px0.G, px0.B)
+ this.CalculateCb(px1.R, px1.G, px1.B)
+ this.CalculateCb(px2.R, px2.G, px2.B)
+ this.CalculateCb(px3.R, px3.G, px3.B));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private float CalculateAverageCr(Rgb24 px0, Rgb24 px1, Rgb24 px2, Rgb24 px3)
{
return 0.25f
* (this.CalculateCr(px0.R, px0.G, px0.B)
+ this.CalculateCr(px1.R, px1.G, px1.B)
+ this.CalculateCr(px2.R, px2.G, px2.B)
+ this.CalculateCr(px3.R, px3.G, px3.B));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int Fix(float x)
=> (int)((x * (1L << ScaleBits)) + 0.5F);

195
src/ImageSharp/Formats/Jpeg/Components/Encoder/RgbToYCbCrConverterVectorized.cs

@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
@ -27,19 +27,45 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
}
}
public static int AvxCompatibilityPadding
{
// rgb byte matrices contain 8 strides by 8 pixels each, thus 64 pixels total
// Strides are stored sequentially - one big span of 64 * 3 = 192 bytes
// Each stride has exactly 3 * 8 = 24 bytes or 3 * 8 * 8 = 192 bits
// Avx registers are 256 bits so rgb span will be loaded with extra 64 bits from the next stride:
// stride 0 0 - 192 -(+64bits)-> 256
// stride 1 192 - 384 -(+64bits)-> 448
// stride 2 384 - 576 -(+64bits)-> 640
// stride 3 576 - 768 -(+64bits)-> 832
// stride 4 768 - 960 -(+64bits)-> 1024
// stride 5 960 - 1152 -(+64bits)-> 1216
// stride 6 1152 - 1344 -(+64bits)-> 1408
// stride 7 1344 - 1536 -(+64bits)-> 1600 <-- READ ACCESS VIOLATION
//
// Total size of the 64 pixel rgb span: 64 * 3 * 8 = 1536 bits, avx operations require 1600 bits
// This is not permitted - we are reading foreign memory
//
// 8 byte padding to rgb byte span will solve this problem without extra code in converters
get
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (IsSupported)
{
return 8;
}
#endif
return 0;
}
}
#if SUPPORTS_RUNTIME_INTRINSICS
private static ReadOnlySpan<byte> MoveFirst24BytesToSeparateLanes => new byte[]
{
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0,
3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0
};
private static ReadOnlySpan<byte> MoveLast24BytesToSeparateLanes => new byte[]
{
2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0,
5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 0
};
private static ReadOnlySpan<byte> ExtractRgb => new byte[]
{
0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF,
@ -47,7 +73,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
};
#endif
public static void Convert(ReadOnlySpan<Rgb24> rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock)
/// <summary>
/// Converts 8x8 Rgb24 pixel matrix to YCbCr pixel matrices with 4:4:4 subsampling
/// </summary>
/// <remarks>Total size of rgb span must be 200 bytes</remarks>
/// <param name="rgbSpan">Span of rgb pixels with size of 64</param>
/// <param name="yBlock">8x8 destination matrix of Luminance(Y) converted data</param>
/// <param name="cbBlock">8x8 destination matrix of Chrominance(Cb) converted data</param>
/// <param name="crBlock">8x8 destination matrix of Chrominance(Cr) converted data</param>
public static void Convert444(ReadOnlySpan<Rgb24> rgbSpan, ref Block8x8F yBlock, ref Block8x8F cbBlock, ref Block8x8F crBlock)
{
Debug.Assert(IsSupported, "AVX2 is required to run this converter");
@ -63,18 +97,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
var f05 = Vector256.Create(0.5f);
var zero = Vector256.Create(0).AsByte();
ref Vector256<byte> inRef = ref Unsafe.As<Rgb24, Vector256<byte>>(ref MemoryMarshal.GetReference(rgbSpan));
ref Vector256<float> destYRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref yBlock);
ref Vector256<float> destCbRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref cbBlock);
ref Vector256<float> destCrRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref crBlock);
ref Vector256<byte> rgbByteSpan = ref Unsafe.As<Rgb24, Vector256<byte>>(ref MemoryMarshal.GetReference(rgbSpan));
ref Vector256<float> destYRef = ref yBlock.V0;
ref Vector256<float> destCbRef = ref cbBlock.V0;
ref Vector256<float> destCrRef = ref crBlock.V0;
var extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes));
var extractRgbMask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ExtractRgb));
Vector256<byte> rgb, rg, bx;
Vector256<float> r, g, b;
for (int i = 0; i < 7; i++)
const int bytesPerRgbStride = 24;
for (int i = 0; i < 8; i++)
{
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)(24 * i)).AsUInt32(), extractToLanesMask).AsByte();
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * i)).AsUInt32(), extractToLanesMask).AsByte();
rgb = Avx2.Shuffle(rgb, extractRgbMask);
@ -94,27 +130,130 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
// 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b))
Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r));
}
#endif
}
/// <summary>
/// Converts 16x8 Rgb24 pixels matrix to 2 Y 8x8 matrices with 4:2:0 subsampling
/// </summary>
public static void Convert420(ReadOnlySpan<Rgb24> rgbSpan, ref Block8x8F yBlockLeft, ref Block8x8F yBlockRight, ref Block8x8F cbBlock, ref Block8x8F crBlock, int row)
{
Debug.Assert(IsSupported, "AVX2 is required to run this converter");
#if SUPPORTS_RUNTIME_INTRINSICS
var f0299 = Vector256.Create(0.299f);
var f0587 = Vector256.Create(0.587f);
var f0114 = Vector256.Create(0.114f);
var fn0168736 = Vector256.Create(-0.168736f);
var fn0331264 = Vector256.Create(-0.331264f);
var f128 = Vector256.Create(128f);
var fn0418688 = Vector256.Create(-0.418688f);
var fn0081312F = Vector256.Create(-0.081312F);
var f05 = Vector256.Create(0.5f);
var zero = Vector256.Create(0).AsByte();
ref Vector256<byte> rgbByteSpan = ref Unsafe.As<Rgb24, Vector256<byte>>(ref MemoryMarshal.GetReference(rgbSpan));
int destOffset = row * 4;
extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(MoveLast24BytesToSeparateLanes));
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref inRef, (IntPtr)160).AsUInt32(), extractToLanesMask).AsByte();
rgb = Avx2.Shuffle(rgb, extractRgbMask);
ref Vector256<float> destCbRef = ref Unsafe.Add(ref Unsafe.As<Block8x8F, Vector256<float>>(ref cbBlock), destOffset);
ref Vector256<float> destCrRef = ref Unsafe.Add(ref Unsafe.As<Block8x8F, Vector256<float>>(ref crBlock), destOffset);
rg = Avx2.UnpackLow(rgb, zero);
bx = Avx2.UnpackHigh(rgb, zero);
var extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes));
var extractRgbMask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ExtractRgb));
Vector256<byte> rgb, rg, bx;
Vector256<float> r, g, b;
Span<Vector256<float>> rDataLanes = stackalloc Vector256<float>[4];
Span<Vector256<float>> gDataLanes = stackalloc Vector256<float>[4];
Span<Vector256<float>> bDataLanes = stackalloc Vector256<float>[4];
const int bytesPerRgbStride = 24;
for (int i = 0; i < 4; i++)
{
// 16x2 => 8x1
// left 8x8 column conversions
for (int j = 0; j < 4; j += 2)
{
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte();
rgb = Avx2.Shuffle(rgb, extractRgbMask);
rg = Avx2.UnpackLow(rgb, zero);
bx = Avx2.UnpackHigh(rgb, zero);
r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32());
g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32());
b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32());
int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1);
// (0.299F * r) + (0.587F * g) + (0.114F * b);
Unsafe.Add(ref yBlockLeft.V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r);
rDataLanes[j] = r;
gDataLanes[j] = g;
bDataLanes[j] = b;
}
// 16x2 => 8x1
// right 8x8 column conversions
for (int j = 1; j < 4; j += 2)
{
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, (IntPtr)(bytesPerRgbStride * (i * 4 + j))).AsUInt32(), extractToLanesMask).AsByte();
rgb = Avx2.Shuffle(rgb, extractRgbMask);
rg = Avx2.UnpackLow(rgb, zero);
bx = Avx2.UnpackHigh(rgb, zero);
r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32());
g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32());
b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32());
r = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32());
g = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32());
b = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32());
// (0.299F * r) + (0.587F * g) + (0.114F * b);
Unsafe.Add(ref destYRef, 7) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r);
int yBlockVerticalOffset = (i * 2) + ((j & 2) >> 1);
// 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b))
Unsafe.Add(ref destCbRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r));
// (0.299F * r) + (0.587F * g) + (0.114F * b);
Unsafe.Add(ref yBlockRight.V0, yBlockVerticalOffset) = SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f0114, b), f0587, g), f0299, r);
// 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b))
Unsafe.Add(ref destCrRef, 7) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r));
rDataLanes[j] = r;
gDataLanes[j] = g;
bDataLanes[j] = b;
}
r = Scale16x2_8x1(rDataLanes);
g = Scale16x2_8x1(gDataLanes);
b = Scale16x2_8x1(bDataLanes);
// 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b))
Unsafe.Add(ref destCbRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(f05, b), fn0331264, g), fn0168736, r));
// 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b))
Unsafe.Add(ref destCrRef, i) = Avx.Add(f128, SimdUtils.HwIntrinsics.MultiplyAdd(SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(fn0081312F, b), fn0418688, g), f05, r));
}
#endif
}
#if SUPPORTS_RUNTIME_INTRINSICS
/// <summary>
/// Scales 16x2 matrix to 8x1 using 2x2 average
/// </summary>
/// <param name="v">Input matrix consisting of 4 256bit vectors</param>
/// <returns>256bit vector containing upper and lower scaled parts of the input matrix</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector256<float> Scale16x2_8x1(ReadOnlySpan<Vector256<float>> v)
{
Debug.Assert(Avx2.IsSupported, "AVX2 is required to run this converter");
DebugGuard.IsTrue(v.Length == 4, "Input span must consist of 4 elements");
var f025 = Vector256.Create(0.25f);
Vector256<float> left = Avx.Add(v[0], v[2]);
Vector256<float> right = Avx.Add(v[1], v[3]);
Vector256<float> avg2x2 = Avx.Multiply(Avx.HorizontalAdd(left, right), f025);
return Avx2.Permute4x64(avg2x2.AsDouble(), 0b11_01_10_00).AsSingle();
}
#endif
}
}

121
src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter420{TPixel}.cs

@ -0,0 +1,121 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
/// <summary>
/// On-stack worker struct to efficiently encapsulate the TPixel -> Rgb24 -> YCbCr conversion chain of 8x8 pixel blocks.
/// </summary>
/// <typeparam name="TPixel">The pixel type to work on</typeparam>
internal ref struct YCbCrForwardConverter420<TPixel>
where TPixel : unmanaged, IPixel<TPixel>
{
/// <summary>
/// Number of pixels processed per single <see cref="Convert(int, int, ref RowOctet{TPixel}, int)"/> call
/// </summary>
private const int PixelsPerSample = 16 * 8;
/// <summary>
/// Total byte size of processed pixels converted from TPixel to <see cref="Rgb24"/>
/// </summary>
private const int RgbSpanByteSize = PixelsPerSample * 3;
/// <summary>
/// <see cref="Size"/> of sampling area from given frame pixel buffer
/// </summary>
private static readonly Size SampleSize = new Size(16, 8);
/// <summary>
/// The left Y component
/// </summary>
public Block8x8F YLeft;
/// <summary>
/// The left Y component
/// </summary>
public Block8x8F YRight;
/// <summary>
/// The Cb component
/// </summary>
public Block8x8F Cb;
/// <summary>
/// The Cr component
/// </summary>
public Block8x8F Cr;
/// <summary>
/// The color conversion tables
/// </summary>
private RgbToYCbCrConverterLut colorTables;
/// <summary>
/// Temporal 16x8 block to hold TPixel data
/// </summary>
private Span<TPixel> pixelSpan;
/// <summary>
/// Temporal RGB block
/// </summary>
private Span<Rgb24> rgbSpan;
/// <summary>
/// Sampled pixel buffer size
/// </summary>
private Size samplingAreaSize;
/// <summary>
/// <see cref="Configuration"/> for internal operations
/// </summary>
private Configuration config;
public YCbCrForwardConverter420(ImageFrame<TPixel> frame)
{
// matrices would be filled during convert calls
this.YLeft = default;
this.YRight = default;
this.Cb = default;
this.Cr = default;
// temporal pixel buffers
this.pixelSpan = new TPixel[PixelsPerSample].AsSpan();
this.rgbSpan = MemoryMarshal.Cast<byte, Rgb24>(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxCompatibilityPadding].AsSpan());
// frame data
this.samplingAreaSize = new Size(frame.Width, frame.Height);
this.config = frame.GetConfiguration();
// conversion vector fallback data
if (!RgbToYCbCrConverterVectorized.IsSupported)
{
this.colorTables = RgbToYCbCrConverterLut.Create();
}
else
{
this.colorTables = default;
}
}
public void Convert(int x, int y, ref RowOctet<TPixel> currentRows, int idx)
{
YCbCrForwardConverter<TPixel>.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize);
PixelOperations<TPixel>.Instance.ToRgb24(this.config, this.pixelSpan, this.rgbSpan);
if (RgbToYCbCrConverterVectorized.IsSupported)
{
RgbToYCbCrConverterVectorized.Convert420(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx);
}
else
{
this.colorTables.Convert420(this.rgbSpan, ref this.YLeft, ref this.YRight, ref this.Cb, ref this.Cr, idx);
}
}
}
}

122
src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter444{TPixel}.cs

@ -0,0 +1,122 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
/// <summary>
/// On-stack worker struct to efficiently encapsulate the TPixel -> Rgb24 -> YCbCr conversion chain of 8x8 pixel blocks.
/// </summary>
/// <typeparam name="TPixel">The pixel type to work on</typeparam>
internal ref struct YCbCrForwardConverter444<TPixel>
where TPixel : unmanaged, IPixel<TPixel>
{
/// <summary>
/// Number of pixels processed per single <see cref="Convert(int, int, ref RowOctet{TPixel})"/> call
/// </summary>
private const int PixelsPerSample = 8 * 8;
/// <summary>
/// Total byte size of processed pixels converted from TPixel to <see cref="Rgb24"/>
/// </summary>
private const int RgbSpanByteSize = PixelsPerSample * 3;
/// <summary>
/// <see cref="Size"/> of sampling area from given frame pixel buffer
/// </summary>
private static readonly Size SampleSize = new Size(8, 8);
/// <summary>
/// The Y component
/// </summary>
public Block8x8F Y;
/// <summary>
/// The Cb component
/// </summary>
public Block8x8F Cb;
/// <summary>
/// The Cr component
/// </summary>
public Block8x8F Cr;
/// <summary>
/// The color conversion tables
/// </summary>
private RgbToYCbCrConverterLut colorTables;
/// <summary>
/// Temporal 64-byte span to hold unconverted TPixel data
/// </summary>
private Span<TPixel> pixelSpan;
/// <summary>
/// Temporal 64-byte span to hold converted Rgb24 data
/// </summary>
private Span<Rgb24> rgbSpan;
/// <summary>
/// Sampled pixel buffer size
/// </summary>
private Size samplingAreaSize;
/// <summary>
/// <see cref="Configuration"/> for internal operations
/// </summary>
private Configuration config;
public YCbCrForwardConverter444(ImageFrame<TPixel> frame)
{
// matrices would be filled during convert calls
this.Y = default;
this.Cb = default;
this.Cr = default;
// temporal pixel buffers
this.pixelSpan = new TPixel[PixelsPerSample].AsSpan();
this.rgbSpan = MemoryMarshal.Cast<byte, Rgb24>(new byte[RgbSpanByteSize + RgbToYCbCrConverterVectorized.AvxCompatibilityPadding].AsSpan());
// frame data
this.samplingAreaSize = new Size(frame.Width, frame.Height);
this.config = frame.GetConfiguration();
// conversion vector fallback data
if (!RgbToYCbCrConverterVectorized.IsSupported)
{
this.colorTables = RgbToYCbCrConverterLut.Create();
}
else
{
this.colorTables = default;
}
}
/// <summary>
/// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (<see cref="Y"/>, <see cref="Cb"/>, <see cref="Cr"/>)
/// </summary>
public void Convert(int x, int y, ref RowOctet<TPixel> currentRows)
{
YCbCrForwardConverter<TPixel>.LoadAndStretchEdges(currentRows, this.pixelSpan, new Point(x, y), SampleSize, this.samplingAreaSize);
PixelOperations<TPixel>.Instance.ToRgb24(this.config, this.pixelSpan, this.rgbSpan);
ref Block8x8F yBlock = ref this.Y;
ref Block8x8F cbBlock = ref this.Cb;
ref Block8x8F crBlock = ref this.Cr;
if (RgbToYCbCrConverterVectorized.IsSupported)
{
RgbToYCbCrConverterVectorized.Convert444(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
}
else
{
this.colorTables.Convert444(this.rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
}
}
}
}

90
src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs

@ -2,81 +2,59 @@
// Licensed under the Apache License, Version 2.0.
using System;
using SixLabors.ImageSharp.Advanced;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
/// <summary>
/// On-stack worker struct to efficiently encapsulate the TPixel -> Rgb24 -> YCbCr conversion chain of 8x8 pixel blocks.
/// </summary>
/// <typeparam name="TPixel">The pixel type to work on</typeparam>
internal ref struct YCbCrForwardConverter<TPixel>
internal static class YCbCrForwardConverter<TPixel>
where TPixel : unmanaged, IPixel<TPixel>
{
/// <summary>
/// The Y component
/// </summary>
public Block8x8F Y;
/// <summary>
/// The Cb component
/// </summary>
public Block8x8F Cb;
/// <summary>
/// The Cr component
/// </summary>
public Block8x8F Cr;
public static void LoadAndStretchEdges(RowOctet<TPixel> source, Span<TPixel> dest, Point start, Size sampleSize, Size totalSize)
{
DebugGuard.MustBeBetweenOrEqualTo(start.X, 1, totalSize.Width - 1, nameof(start.X));
DebugGuard.MustBeBetweenOrEqualTo(start.Y, 1, totalSize.Height - 1, nameof(start.Y));
/// <summary>
/// The color conversion tables
/// </summary>
private RgbToYCbCrConverterLut colorTables;
int width = Math.Min(sampleSize.Width, totalSize.Width - start.X);
int height = Math.Min(sampleSize.Height, totalSize.Height - start.Y);
/// <summary>
/// Temporal 8x8 block to hold TPixel data
/// </summary>
private GenericBlock8x8<TPixel> pixelBlock;
uint byteWidth = (uint)(width * Unsafe.SizeOf<TPixel>());
int remainderXCount = sampleSize.Width - width;
/// <summary>
/// Temporal RGB block
/// </summary>
private GenericBlock8x8<Rgb24> rgbBlock;
ref byte blockStart = ref MemoryMarshal.GetReference(MemoryMarshal.Cast<TPixel, byte>(dest));
int rowSizeInBytes = sampleSize.Width * Unsafe.SizeOf<TPixel>();
public static YCbCrForwardConverter<TPixel> Create()
{
var result = default(YCbCrForwardConverter<TPixel>);
if (!RgbToYCbCrConverterVectorized.IsSupported)
for (int y = 0; y < height; y++)
{
// Avoid creating lookup tables, when vectorized converter is supported
result.colorTables = RgbToYCbCrConverterLut.Create();
}
Span<TPixel> row = source[y];
return result;
}
ref byte s = ref Unsafe.As<TPixel, byte>(ref row[start.X]);
ref byte d = ref Unsafe.Add(ref blockStart, y * rowSizeInBytes);
/// <summary>
/// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (<see cref="Y"/>, <see cref="Cb"/>, <see cref="Cr"/>)
/// </summary>
public void Convert(ImageFrame<TPixel> frame, int x, int y, ref RowOctet<TPixel> currentRows)
{
this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows);
Unsafe.CopyBlock(ref d, ref s, byteWidth);
ref TPixel last = ref Unsafe.Add(ref Unsafe.As<byte, TPixel>(ref d), width - 1);
Span<Rgb24> rgbSpan = this.rgbBlock.AsSpanUnsafe();
PixelOperations<TPixel>.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), rgbSpan);
for (int x = 1; x <= remainderXCount; x++)
{
Unsafe.Add(ref last, x) = last;
}
}
ref Block8x8F yBlock = ref this.Y;
ref Block8x8F cbBlock = ref this.Cb;
ref Block8x8F crBlock = ref this.Cr;
int remainderYCount = sampleSize.Height - height;
if (RgbToYCbCrConverterVectorized.IsSupported)
if (remainderYCount == 0)
{
RgbToYCbCrConverterVectorized.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
return;
}
else
ref byte lastRowStart = ref Unsafe.Add(ref blockStart, (height - 1) * rowSizeInBytes);
for (int y = 1; y <= remainderYCount; y++)
{
this.colorTables.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
ref byte remStart = ref Unsafe.Add(ref lastRowStart, rowSizeInBytes * y);
Unsafe.CopyBlock(ref remStart, ref lastRowStart, (uint)rowSizeInBytes);
}
}
}

463
src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs

@ -1,8 +1,13 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
// ReSharper disable InconsistentNaming
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
@ -10,7 +15,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <summary>
/// Contains inaccurate, but fast forward and inverse DCT implementations.
/// </summary>
internal static class FastFloatingPointDCT
internal static partial class FastFloatingPointDCT
{
#pragma warning disable SA1310 // FieldNamesMustNotContainUnderscore
private const float C_1_175876 = 1.175875602f;
@ -38,147 +43,31 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
private const float C_0_765367 = 0.765366865f;
private const float C_0_125 = 0.1250f;
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector256<float> C_V_0_5411 = Vector256.Create(0.541196f);
private static readonly Vector256<float> C_V_1_3065 = Vector256.Create(1.306563f);
private static readonly Vector256<float> C_V_1_1758 = Vector256.Create(1.175876f);
private static readonly Vector256<float> C_V_0_7856 = Vector256.Create(0.785695f);
private static readonly Vector256<float> C_V_1_3870 = Vector256.Create(1.387040f);
private static readonly Vector256<float> C_V_0_2758 = Vector256.Create(0.275899f);
private static readonly Vector256<float> C_V_n1_9615 = Vector256.Create(-1.961570560f);
private static readonly Vector256<float> C_V_n0_3901 = Vector256.Create(-0.390180644f);
private static readonly Vector256<float> C_V_n0_8999 = Vector256.Create(-0.899976223f);
private static readonly Vector256<float> C_V_n2_5629 = Vector256.Create(-2.562915447f);
private static readonly Vector256<float> C_V_0_2986 = Vector256.Create(0.298631336f);
private static readonly Vector256<float> C_V_2_0531 = Vector256.Create(2.053119869f);
private static readonly Vector256<float> C_V_3_0727 = Vector256.Create(3.072711026f);
private static readonly Vector256<float> C_V_1_5013 = Vector256.Create(1.501321110f);
private static readonly Vector256<float> C_V_n1_8477 = Vector256.Create(-1.847759065f);
private static readonly Vector256<float> C_V_0_7653 = Vector256.Create(0.765366865f);
private static Vector256<float> C_V_InvSqrt2 = Vector256.Create(0.707107f);
#endif
#pragma warning restore SA1310 // FieldNamesMustNotContainUnderscore
private static readonly Vector4 InvSqrt2 = new Vector4(0.707107f);
/// <summary>
/// Apply floating point IDCT transformation into dest, using a temporary block 'temp' provided by the caller (optimization).
/// Ported from https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L239
/// </summary>
/// <param name="src">Source</param>
/// <param name="dest">Destination</param>
/// <param name="temp">Temporary block provided by the caller</param>
public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp)
{
src.TransposeInto(ref temp);
IDCT8x4_LeftPart(ref temp, ref dest);
IDCT8x4_RightPart(ref temp, ref dest);
dest.TransposeInto(ref temp);
IDCT8x4_LeftPart(ref temp, ref dest);
IDCT8x4_RightPart(ref temp, ref dest);
// TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing?
dest.MultiplyInPlace(C_0_125);
}
/// <summary>
/// Do IDCT internal operations on the left part of the block. Original src:
/// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261
/// </summary>
/// <param name="s">The source block</param>
/// <param name="d">Destination block</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void IDCT8x4_LeftPart(ref Block8x8F s, ref Block8x8F d)
{
Vector4 my1 = s.V1L;
Vector4 my7 = s.V7L;
Vector4 mz0 = my1 + my7;
Vector4 my3 = s.V3L;
Vector4 mz2 = my3 + my7;
Vector4 my5 = s.V5L;
Vector4 mz1 = my3 + my5;
Vector4 mz3 = my1 + my5;
Vector4 mz4 = (mz0 + mz1) * C_1_175876;
mz2 = (mz2 * C_1_961571) + mz4;
mz3 = (mz3 * C_0_390181) + mz4;
mz0 = mz0 * C_0_899976;
mz1 = mz1 * C_2_562915;
Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2;
Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3;
Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2;
Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3;
Vector4 my2 = s.V2L;
Vector4 my6 = s.V6L;
mz4 = (my2 + my6) * C_0_541196;
Vector4 my0 = s.V0L;
Vector4 my4 = s.V4L;
mz0 = my0 + my4;
mz1 = my0 - my4;
mz2 = mz4 + (my6 * C_1_847759);
mz3 = mz4 + (my2 * C_0_765367);
my0 = mz0 + mz3;
my3 = mz0 - mz3;
my1 = mz1 + mz2;
my2 = mz1 - mz2;
d.V0L = my0 + mb0;
d.V7L = my0 - mb0;
d.V1L = my1 + mb1;
d.V6L = my1 - mb1;
d.V2L = my2 + mb2;
d.V5L = my2 - mb2;
d.V3L = my3 + mb3;
d.V4L = my3 - mb3;
}
/// <summary>
/// Do IDCT internal operations on the right part of the block.
/// Original src:
/// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261
/// </summary>
/// <param name="s">The source block</param>
/// <param name="d">The destination block</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void IDCT8x4_RightPart(ref Block8x8F s, ref Block8x8F d)
{
Vector4 my1 = s.V1R;
Vector4 my7 = s.V7R;
Vector4 mz0 = my1 + my7;
Vector4 my3 = s.V3R;
Vector4 mz2 = my3 + my7;
Vector4 my5 = s.V5R;
Vector4 mz1 = my3 + my5;
Vector4 mz3 = my1 + my5;
Vector4 mz4 = (mz0 + mz1) * C_1_175876;
mz2 = (mz2 * C_1_961571) + mz4;
mz3 = (mz3 * C_0_390181) + mz4;
mz0 = mz0 * C_0_899976;
mz1 = mz1 * C_2_562915;
Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2;
Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3;
Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2;
Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3;
Vector4 my2 = s.V2R;
Vector4 my6 = s.V6R;
mz4 = (my2 + my6) * C_0_541196;
Vector4 my0 = s.V0R;
Vector4 my4 = s.V4R;
mz0 = my0 + my4;
mz1 = my0 - my4;
mz2 = mz4 + (my6 * C_1_847759);
mz3 = mz4 + (my2 * C_0_765367);
my0 = mz0 + mz3;
my3 = mz0 - mz3;
my1 = mz1 + mz2;
my2 = mz1 - mz2;
d.V0R = my0 + mb0;
d.V7R = my0 - mb0;
d.V1R = my1 + mb1;
d.V6R = my1 - mb1;
d.V2R = my2 + mb2;
d.V5R = my2 - mb2;
d.V3R = my3 + mb3;
d.V4R = my3 - mb3;
}
/// <summary>
/// Original:
/// <see>
@ -309,11 +198,84 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
/// <summary>
/// Apply floating point IDCT transformation into dest, using a temporary block 'temp' provided by the caller (optimization)
/// Combined operation of <see cref="FDCT8x4_LeftPart(ref Block8x8F, ref Block8x8F)"/> and <see cref="FDCT8x4_RightPart(ref Block8x8F, ref Block8x8F)"/>
/// using AVX commands.
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
public static void FDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
Debug.Assert(Avx.IsSupported, "AVX is required to execute this method");
Vector256<float> t0 = Avx.Add(s.V0, s.V7);
Vector256<float> t7 = Avx.Subtract(s.V0, s.V7);
Vector256<float> t1 = Avx.Add(s.V1, s.V6);
Vector256<float> t6 = Avx.Subtract(s.V1, s.V6);
Vector256<float> t2 = Avx.Add(s.V2, s.V5);
Vector256<float> t5 = Avx.Subtract(s.V2, s.V5);
Vector256<float> t3 = Avx.Add(s.V3, s.V4);
Vector256<float> t4 = Avx.Subtract(s.V3, s.V4);
Vector256<float> c0 = Avx.Add(t0, t3);
Vector256<float> c1 = Avx.Add(t1, t2);
// 0 4
d.V0 = Avx.Add(c0, c1);
d.V4 = Avx.Subtract(c0, c1);
Vector256<float> c3 = Avx.Subtract(t0, t3);
Vector256<float> c2 = Avx.Subtract(t1, t2);
// 2 6
d.V2 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(c2, C_V_0_5411), c3, C_V_1_3065);
d.V6 = SimdUtils.HwIntrinsics.MultiplySubstract(Avx.Multiply(c2, C_V_1_3065), c3, C_V_0_5411);
c3 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(t4, C_V_1_1758), t7, C_V_0_7856);
c0 = SimdUtils.HwIntrinsics.MultiplySubstract(Avx.Multiply(t4, C_V_0_7856), t7, C_V_1_1758);
c2 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(t5, C_V_1_3870), C_V_0_2758, t6);
c1 = SimdUtils.HwIntrinsics.MultiplySubstract(Avx.Multiply(C_V_0_2758, t5), t6, C_V_1_3870);
// 3 5
d.V3 = Avx.Subtract(c0, c2);
d.V5 = Avx.Subtract(c3, c1);
c0 = Avx.Multiply(Avx.Add(c0, c2), C_V_InvSqrt2);
c3 = Avx.Multiply(Avx.Add(c3, c1), C_V_InvSqrt2);
// 1 7
d.V1 = Avx.Add(c0, c3);
d.V7 = Avx.Subtract(c0, c3);
#endif
}
/// <summary>
/// Performs 8x8 matrix Forward Discrete Cosine Transform
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
public static void FDCT8x8(ref Block8x8F s, ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
FDCT8x8_Avx(ref s, ref d);
}
else
#endif
{
FDCT8x4_LeftPart(ref s, ref d);
FDCT8x4_RightPart(ref s, ref d);
}
}
/// <summary>
/// Apply floating point FDCT from src into dest
/// </summary>
/// <param name="src">Source</param>
/// <param name="dest">Destination</param>
/// <param name="temp">Temporary block provided by the caller</param>
/// <param name="temp">Temporary block provided by the caller for optimization</param>
/// <param name="offsetSourceByNeg128">If true, a constant -128.0 offset is applied for all values before FDCT </param>
public static void TransformFDCT(
ref Block8x8F src,
@ -327,14 +289,225 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
temp.AddInPlace(-128F);
}
FDCT8x4_LeftPart(ref temp, ref dest);
FDCT8x4_RightPart(ref temp, ref dest);
FDCT8x8(ref temp, ref dest);
dest.TransposeInto(ref temp);
FDCT8x4_LeftPart(ref temp, ref dest);
FDCT8x4_RightPart(ref temp, ref dest);
FDCT8x8(ref temp, ref dest);
dest.MultiplyInPlace(C_0_125);
}
/// <summary>
/// Performs 8x8 matrix Inverse Discrete Cosine Transform
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
public static void IDCT8x8(ref Block8x8F s, ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
IDCT8x8_Avx(ref s, ref d);
}
else
#endif
{
IDCT8x4_LeftPart(ref s, ref d);
IDCT8x4_RightPart(ref s, ref d);
}
}
/// <summary>
/// Do IDCT internal operations on the left part of the block. Original src:
/// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261
/// </summary>
/// <param name="s">The source block</param>
/// <param name="d">Destination block</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void IDCT8x4_LeftPart(ref Block8x8F s, ref Block8x8F d)
{
Vector4 my1 = s.V1L;
Vector4 my7 = s.V7L;
Vector4 mz0 = my1 + my7;
Vector4 my3 = s.V3L;
Vector4 mz2 = my3 + my7;
Vector4 my5 = s.V5L;
Vector4 mz1 = my3 + my5;
Vector4 mz3 = my1 + my5;
Vector4 mz4 = (mz0 + mz1) * C_1_175876;
mz2 = (mz2 * C_1_961571) + mz4;
mz3 = (mz3 * C_0_390181) + mz4;
mz0 = mz0 * C_0_899976;
mz1 = mz1 * C_2_562915;
Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2;
Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3;
Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2;
Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3;
Vector4 my2 = s.V2L;
Vector4 my6 = s.V6L;
mz4 = (my2 + my6) * C_0_541196;
Vector4 my0 = s.V0L;
Vector4 my4 = s.V4L;
mz0 = my0 + my4;
mz1 = my0 - my4;
mz2 = mz4 + (my6 * C_1_847759);
mz3 = mz4 + (my2 * C_0_765367);
my0 = mz0 + mz3;
my3 = mz0 - mz3;
my1 = mz1 + mz2;
my2 = mz1 - mz2;
d.V0L = my0 + mb0;
d.V7L = my0 - mb0;
d.V1L = my1 + mb1;
d.V6L = my1 - mb1;
d.V2L = my2 + mb2;
d.V5L = my2 - mb2;
d.V3L = my3 + mb3;
d.V4L = my3 - mb3;
}
/// <summary>
/// Do IDCT internal operations on the right part of the block.
/// Original src:
/// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261
/// </summary>
/// <param name="s">The source block</param>
/// <param name="d">The destination block</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void IDCT8x4_RightPart(ref Block8x8F s, ref Block8x8F d)
{
Vector4 my1 = s.V1R;
Vector4 my7 = s.V7R;
Vector4 mz0 = my1 + my7;
Vector4 my3 = s.V3R;
Vector4 mz2 = my3 + my7;
Vector4 my5 = s.V5R;
Vector4 mz1 = my3 + my5;
Vector4 mz3 = my1 + my5;
Vector4 mz4 = (mz0 + mz1) * C_1_175876;
mz2 = (mz2 * C_1_961571) + mz4;
mz3 = (mz3 * C_0_390181) + mz4;
mz0 = mz0 * C_0_899976;
mz1 = mz1 * C_2_562915;
Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2;
Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3;
Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2;
Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3;
Vector4 my2 = s.V2R;
Vector4 my6 = s.V6R;
mz4 = (my2 + my6) * C_0_541196;
Vector4 my0 = s.V0R;
Vector4 my4 = s.V4R;
mz0 = my0 + my4;
mz1 = my0 - my4;
mz2 = mz4 + (my6 * C_1_847759);
mz3 = mz4 + (my2 * C_0_765367);
my0 = mz0 + mz3;
my3 = mz0 - mz3;
my1 = mz1 + mz2;
my2 = mz1 - mz2;
d.V0R = my0 + mb0;
d.V7R = my0 - mb0;
d.V1R = my1 + mb1;
d.V6R = my1 - mb1;
d.V2R = my2 + mb2;
d.V5R = my2 - mb2;
d.V3R = my3 + mb3;
d.V4R = my3 - mb3;
}
/// <summary>
/// Combined operation of <see cref="IDCT8x4_LeftPart(ref Block8x8F, ref Block8x8F)"/> and <see cref="IDCT8x4_RightPart(ref Block8x8F, ref Block8x8F)"/>
/// using AVX commands.
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
public static void IDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
Debug.Assert(Avx.IsSupported, "AVX is required to execute this method");
Vector256<float> my1 = s.V1;
Vector256<float> my7 = s.V7;
Vector256<float> mz0 = Avx.Add(my1, my7);
Vector256<float> my3 = s.V3;
Vector256<float> mz2 = Avx.Add(my3, my7);
Vector256<float> my5 = s.V5;
Vector256<float> mz1 = Avx.Add(my3, my5);
Vector256<float> mz3 = Avx.Add(my1, my5);
Vector256<float> mz4 = Avx.Multiply(Avx.Add(mz0, mz1), C_V_1_1758);
mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz2, C_V_n1_9615);
mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz3, C_V_n0_3901);
mz0 = Avx.Multiply(mz0, C_V_n0_8999);
mz1 = Avx.Multiply(mz1, C_V_n2_5629);
Vector256<float> mb3 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my7, C_V_0_2986), mz2);
Vector256<float> mb2 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my5, C_V_2_0531), mz3);
Vector256<float> mb1 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my3, C_V_3_0727), mz2);
Vector256<float> mb0 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my1, C_V_1_5013), mz3);
Vector256<float> my2 = s.V2;
Vector256<float> my6 = s.V6;
mz4 = Avx.Multiply(Avx.Add(my2, my6), C_V_0_5411);
Vector256<float> my0 = s.V0;
Vector256<float> my4 = s.V4;
mz0 = Avx.Add(my0, my4);
mz1 = Avx.Subtract(my0, my4);
mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my6, C_V_n1_8477);
mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my2, C_V_0_7653);
my0 = Avx.Add(mz0, mz3);
my3 = Avx.Subtract(mz0, mz3);
my1 = Avx.Add(mz1, mz2);
my2 = Avx.Subtract(mz1, mz2);
d.V0 = Avx.Add(my0, mb0);
d.V7 = Avx.Subtract(my0, mb0);
d.V1 = Avx.Add(my1, mb1);
d.V6 = Avx.Subtract(my1, mb1);
d.V2 = Avx.Add(my2, mb2);
d.V5 = Avx.Subtract(my2, mb2);
d.V3 = Avx.Add(my3, mb3);
d.V4 = Avx.Subtract(my3, mb3);
#endif
}
/// <summary>
/// Apply floating point IDCT transformation into dest, using a temporary block 'temp' provided by the caller (optimization).
/// Ported from https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L239
/// </summary>
/// <param name="src">Source</param>
/// <param name="dest">Destination</param>
/// <param name="temp">Temporary block provided by the caller</param>
public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp)
{
src.TransposeInto(ref temp);
IDCT8x8(ref temp, ref dest);
dest.TransposeInto(ref temp);
IDCT8x8(ref temp, ref dest);
// TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing?
dest.MultiplyInPlace(C_0_125);
}
}

642
src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs

@ -5,14 +5,11 @@ using System;
using System.Buffers.Binary;
using System.IO;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading;
using SixLabors.ImageSharp.Common.Helpers;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder;
using SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.Metadata;
using SixLabors.ImageSharp.Metadata.Profiles.Exif;
using SixLabors.ImageSharp.Metadata.Profiles.Icc;
@ -32,20 +29,47 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
private const int QuantizationTableCount = 2;
/// <summary>
/// A scratch buffer to reduce allocations.
/// Gets the unscaled quantization tables in zig-zag order. Each
/// encoder copies and scales the tables according to its quality parameter.
/// The values are derived from section K.1 after converting from natural to
/// zig-zag order.
/// </summary>
private readonly byte[] buffer = new byte[20];
// The C# compiler emits this as a compile-time constant embedded in the PE file.
// This is effectively compiled down to: return new ReadOnlySpan<byte>(&data, length)
// More details can be found: https://github.com/dotnet/roslyn/pull/24621
private static ReadOnlySpan<byte> UnscaledQuant_Luminance => new byte[]
{
// Luminance.
16, 11, 12, 14, 12, 10, 16, 14, 13, 14, 18, 17, 16, 19, 24,
40, 26, 24, 22, 22, 24, 49, 35, 37, 29, 40, 58, 51, 61, 60,
57, 51, 56, 55, 64, 72, 92, 78, 64, 68, 87, 69, 55, 56, 80,
109, 81, 87, 95, 98, 103, 104, 103, 62, 77, 113, 121, 112,
100, 120, 92, 101, 103, 99,
};
/// <summary>
/// A buffer for reducing the number of stream writes when emitting Huffman tables. 64 seems to be enough.
/// Gets the unscaled quantization tables in zig-zag order. Each
/// encoder copies and scales the tables according to its quality parameter.
/// The values are derived from section K.1 after converting from natural to
/// zig-zag order.
/// </summary>
private readonly byte[] emitBuffer = new byte[64];
// The C# compiler emits this as a compile-time constant embedded in the PE file.
// This is effectively compiled down to: return new ReadOnlySpan<byte>(&data, length)
// More details can be found: https://github.com/dotnet/roslyn/pull/24621
private static ReadOnlySpan<byte> UnscaledQuant_Chrominance => new byte[]
{
// Chrominance.
17, 18, 18, 24, 21, 24, 47, 26, 26, 47, 99, 66, 56, 66,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
};
/// <summary>
/// A buffer for reducing the number of stream writes when emitting Huffman tables. Max combined table lengths +
/// identifier.
/// A scratch buffer to reduce allocations.
/// </summary>
private readonly byte[] huffmanBuffer = new byte[179];
private readonly byte[] buffer = new byte[20];
/// <summary>
/// Gets or sets the subsampling method to use.
@ -62,26 +86,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
/// </summary>
private readonly JpegColorType? colorType;
/// <summary>
/// The accumulated bits to write to the stream.
/// </summary>
private uint accumulatedBits;
/// <summary>
/// The accumulated bit count.
/// </summary>
private uint bitCount;
/// <summary>
/// The scaled chrominance table, in zig-zag order.
/// </summary>
private Block8x8F chrominanceQuantTable;
/// <summary>
/// The scaled luminance table, in zig-zag order.
/// </summary>
private Block8x8F luminanceQuantTable;
/// <summary>
/// The output stream. All attempted writes after the first error become no-ops.
/// </summary>
@ -98,67 +102,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
this.colorType = options.ColorType;
}
/// <summary>
/// Gets the counts the number of bits needed to hold an integer.
/// </summary>
// The C# compiler emits this as a compile-time constant embedded in the PE file.
// This is effectively compiled down to: return new ReadOnlySpan<byte>(&data, length)
// More details can be found: https://github.com/dotnet/roslyn/pull/24621
private static ReadOnlySpan<byte> BitCountLut => new byte[]
{
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8,
};
/// <summary>
/// Gets the unscaled quantization tables in zig-zag order. Each
/// encoder copies and scales the tables according to its quality parameter.
/// The values are derived from section K.1 after converting from natural to
/// zig-zag order.
/// </summary>
// The C# compiler emits this as a compile-time constant embedded in the PE file.
// This is effectively compiled down to: return new ReadOnlySpan<byte>(&data, length)
// More details can be found: https://github.com/dotnet/roslyn/pull/24621
private static ReadOnlySpan<byte> UnscaledQuant_Luminance => new byte[]
{
// Luminance.
16, 11, 12, 14, 12, 10, 16, 14, 13, 14, 18, 17, 16, 19, 24,
40, 26, 24, 22, 22, 24, 49, 35, 37, 29, 40, 58, 51, 61, 60,
57, 51, 56, 55, 64, 72, 92, 78, 64, 68, 87, 69, 55, 56, 80,
109, 81, 87, 95, 98, 103, 104, 103, 62, 77, 113, 121, 112,
100, 120, 92, 101, 103, 99,
};
/// <summary>
/// Gets the unscaled quantization tables in zig-zag order. Each
/// encoder copies and scales the tables according to its quality parameter.
/// The values are derived from section K.1 after converting from natural to
/// zig-zag order.
/// </summary>
// The C# compiler emits this as a compile-time constant embedded in the PE file.
// This is effectively compiled down to: return new ReadOnlySpan<byte>(&data, length)
// More details can be found: https://github.com/dotnet/roslyn/pull/24621
private static ReadOnlySpan<byte> UnscaledQuant_Chrominance => new byte[]
{
// Chrominance.
17, 18, 18, 24, 21, 24, 47, 26, 26, 47, 99, 66, 56, 66,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
};
/// <summary>
/// Encode writes the image to the jpeg baseline format with the given options.
/// </summary>
@ -171,14 +114,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
{
Guard.NotNull(image, nameof(image));
Guard.NotNull(stream, nameof(stream));
cancellationToken.ThrowIfCancellationRequested();
const ushort max = JpegConstants.MaxLength;
if (image.Width >= max || image.Height >= max)
if (image.Width >= JpegConstants.MaxLength || image.Height >= JpegConstants.MaxLength)
{
throw new ImageFormatException($"Image is too large to encode at {image.Width}x{image.Height}.");
JpegThrowHelper.ThrowDimensionsTooLarge(image.Width, image.Height);
}
cancellationToken.ThrowIfCancellationRequested();
this.outputStream = stream;
ImageMetadata metadata = image.Metadata;
@ -201,10 +144,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
}
// Initialize the quantization tables.
InitQuantizationTable(0, scale, ref this.luminanceQuantTable);
// TODO: This looks ugly, should we write chrominance table for luminance-only images?
// If not - this can code can be simplified
Block8x8F luminanceQuantTable = default;
Block8x8F chrominanceQuantTable = default;
InitQuantizationTable(0, scale, ref luminanceQuantTable);
if (componentCount > 1)
{
InitQuantizationTable(1, scale, ref this.chrominanceQuantTable);
InitQuantizationTable(1, scale, ref chrominanceQuantTable);
}
// Write the Start Of Image marker.
@ -214,7 +161,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
this.WriteProfiles(metadata);
// Write the quantization tables.
this.WriteDefineQuantizationTables();
this.WriteDefineQuantizationTables(ref luminanceQuantTable, ref chrominanceQuantTable);
// Write the image dimensions.
this.WriteStartOfFrame(image.Width, image.Height, componentCount);
@ -222,13 +169,31 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
// Write the Huffman tables.
this.WriteDefineHuffmanTables(componentCount);
// Write the image data.
// Write the scan header.
this.WriteStartOfScan(image, componentCount, cancellationToken);
// Write the scan compressed data.
var scanEncoder = new HuffmanScanEncoder(stream);
if (this.colorType == JpegColorType.Luminance)
{
scanEncoder.EncodeGrayscale(image, ref luminanceQuantTable, cancellationToken);
}
else
{
switch (subsample)
{
case JpegSubsample.Ratio444:
scanEncoder.Encode444(image, ref luminanceQuantTable, ref chrominanceQuantTable, cancellationToken);
break;
case JpegSubsample.Ratio420:
scanEncoder.Encode420(image, ref luminanceQuantTable, ref chrominanceQuantTable, cancellationToken);
break;
}
}
// Write the End Of Image marker.
this.buffer[0] = JpegConstants.Markers.XFF;
this.buffer[1] = JpegConstants.Markers.EOI;
stream.Write(this.buffer, 0, 2);
this.WriteEndOfImageMarker();
stream.Flush();
}
@ -248,248 +213,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
}
}
/// <summary>
/// Initializes quantization table.
/// </summary>
/// <param name="i">The quantization index.</param>
/// <param name="scale">The scaling factor.</param>
/// <param name="quant">The quantization table.</param>
private static void InitQuantizationTable(int i, int scale, ref Block8x8F quant)
{
DebugGuard.MustBeBetweenOrEqualTo(i, 0, 1, nameof(i));
ReadOnlySpan<byte> unscaledQuant = (i == 0) ? UnscaledQuant_Luminance : UnscaledQuant_Chrominance;
for (int j = 0; j < Block8x8F.Size; j++)
{
int x = unscaledQuant[j];
x = ((x * scale) + 50) / 100;
if (x < 1)
{
x = 1;
}
if (x > 255)
{
x = 255;
}
quant[j] = x;
}
}
/// <summary>
/// Emits the least significant count of bits of bits to the bit-stream.
/// The precondition is bits
/// <example>
/// &lt; 1&lt;&lt;nBits &amp;&amp; nBits &lt;= 16
/// </example>
/// .
/// </summary>
/// <param name="bits">The packed bits.</param>
/// <param name="count">The number of bits</param>
/// <param name="emitBufferBase">The reference to the emitBuffer.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void Emit(uint bits, uint count, ref byte emitBufferBase)
{
count += this.bitCount;
bits <<= (int)(32 - count);
bits |= this.accumulatedBits;
// Only write if more than 8 bits.
if (count >= 8)
{
// Track length
int len = 0;
while (count >= 8)
{
byte b = (byte)(bits >> 24);
Unsafe.Add(ref emitBufferBase, len++) = b;
if (b == byte.MaxValue)
{
Unsafe.Add(ref emitBufferBase, len++) = byte.MinValue;
}
bits <<= 8;
count -= 8;
}
if (len > 0)
{
this.outputStream.Write(this.emitBuffer, 0, len);
}
}
this.accumulatedBits = bits;
this.bitCount = count;
}
/// <summary>
/// Emits the given value with the given Huffman encoder.
/// </summary>
/// <param name="index">The index of the Huffman encoder</param>
/// <param name="value">The value to encode.</param>
/// <param name="emitBufferBase">The reference to the emit buffer.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitHuff(HuffIndex index, int value, ref byte emitBufferBase)
{
uint x = HuffmanLut.TheHuffmanLut[(int)index].Values[value];
this.Emit(x & ((1 << 24) - 1), x >> 24, ref emitBufferBase);
}
/// <summary>
/// Emits a run of runLength copies of value encoded with the given Huffman encoder.
/// </summary>
/// <param name="index">The index of the Huffman encoder</param>
/// <param name="runLength">The number of copies to encode.</param>
/// <param name="value">The value to encode.</param>
/// <param name="emitBufferBase">The reference to the emit buffer.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitHuffRLE(HuffIndex index, int runLength, int value, ref byte emitBufferBase)
{
int a = value;
int b = value;
if (a < 0)
{
a = -value;
b = value - 1;
}
uint bt;
if (a < 0x100)
{
bt = BitCountLut[a];
}
else
{
bt = 8 + (uint)BitCountLut[a >> 8];
}
this.EmitHuff(index, (int)((uint)(runLength << 4) | bt), ref emitBufferBase);
if (bt > 0)
{
this.Emit((uint)b & (uint)((1 << ((int)bt)) - 1), bt, ref emitBufferBase);
}
}
/// <summary>
/// Encodes the image with no subsampling.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
/// <param name="emitBufferBase">The reference to the emit buffer.</param>
private void Encode444<TPixel>(Image<TPixel> pixels, CancellationToken cancellationToken, ref byte emitBufferBase)
where TPixel : unmanaged, IPixel<TPixel>
{
// TODO: Need a JpegScanEncoder<TPixel> class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.)
// (Partially done with YCbCrForwardConverter<TPixel>)
Block8x8F temp1 = default;
Block8x8F temp2 = default;
Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable;
Block8x8F onStackChrominanceQuantTable = this.chrominanceQuantTable;
var unzig = ZigZag.CreateUnzigTable();
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
var pixelConverter = YCbCrForwardConverter<TPixel>.Create();
ImageFrame<TPixel> frame = pixels.Frames.RootFrame;
Buffer2D<TPixel> pixelBuffer = frame.PixelBuffer;
RowOctet<TPixel> currentRows = default;
for (int y = 0; y < pixels.Height; y += 8)
{
cancellationToken.ThrowIfCancellationRequested();
currentRows.Update(pixelBuffer, y);
for (int x = 0; x < pixels.Width; x += 8)
{
pixelConverter.Convert(frame, x, y, ref currentRows);
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
ref temp1,
ref temp2,
ref onStackLuminanceQuantTable,
ref unzig,
ref emitBufferBase);
prevDCCb = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCb,
ref pixelConverter.Cb,
ref temp1,
ref temp2,
ref onStackChrominanceQuantTable,
ref unzig,
ref emitBufferBase);
prevDCCr = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCr,
ref pixelConverter.Cr,
ref temp1,
ref temp2,
ref onStackChrominanceQuantTable,
ref unzig,
ref emitBufferBase);
}
}
}
/// <summary>
/// Encodes the image with no chroma, just luminance.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
/// <param name="emitBufferBase">The reference to the emit buffer.</param>
private void EncodeGrayscale<TPixel>(Image<TPixel> pixels, CancellationToken cancellationToken, ref byte emitBufferBase)
where TPixel : unmanaged, IPixel<TPixel>
{
// TODO: Need a JpegScanEncoder<TPixel> class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.)
// (Partially done with YCbCrForwardConverter<TPixel>)
Block8x8F temp1 = default;
Block8x8F temp2 = default;
Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable;
var unzig = ZigZag.CreateUnzigTable();
// ReSharper disable once InconsistentNaming
int prevDCY = 0;
var pixelConverter = LuminanceForwardConverter<TPixel>.Create();
ImageFrame<TPixel> frame = pixels.Frames.RootFrame;
Buffer2D<TPixel> pixelBuffer = frame.PixelBuffer;
RowOctet<TPixel> currentRows = default;
for (int y = 0; y < pixels.Height; y += 8)
{
cancellationToken.ThrowIfCancellationRequested();
currentRows.Update(pixelBuffer, y);
for (int x = 0; x < pixels.Width; x += 8)
{
pixelConverter.Convert(frame, x, y, ref currentRows);
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
ref temp1,
ref temp2,
ref onStackLuminanceQuantTable,
ref unzig,
ref emitBufferBase);
}
}
}
/// <summary>
/// Writes the application header containing the JFIF identifier plus extra data.
/// </summary>
@ -539,72 +262,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
this.outputStream.Write(this.buffer, 0, 20);
}
/// <summary>
/// Writes a block of pixel data using the given quantization table,
/// returning the post-quantized DC value of the DCT-transformed block.
/// The block is in natural (not zig-zag) order.
/// </summary>
/// <param name="index">The quantization table index.</param>
/// <param name="prevDC">The previous DC value.</param>
/// <param name="src">Source block</param>
/// <param name="tempDest1">Temporal block to be used as FDCT Destination</param>
/// <param name="tempDest2">Temporal block 2</param>
/// <param name="quant">Quantization table</param>
/// <param name="unZig">The 8x8 Unzig block.</param>
/// <param name="emitBufferBase">The reference to the emit buffer.</param>
/// <returns>The <see cref="int"/>.</returns>
private int WriteBlock(
QuantIndex index,
int prevDC,
ref Block8x8F src,
ref Block8x8F tempDest1,
ref Block8x8F tempDest2,
ref Block8x8F quant,
ref ZigZag unZig,
ref byte emitBufferBase)
{
FastFloatingPointDCT.TransformFDCT(ref src, ref tempDest1, ref tempDest2);
Block8x8F.Quantize(ref tempDest1, ref tempDest2, ref quant, ref unZig);
int dc = (int)tempDest2[0];
// Emit the DC delta.
this.EmitHuffRLE((HuffIndex)((2 * (int)index) + 0), 0, dc - prevDC, ref emitBufferBase);
// Emit the AC components.
var h = (HuffIndex)((2 * (int)index) + 1);
int runLength = 0;
for (int zig = 1; zig < Block8x8F.Size; zig++)
{
int ac = (int)tempDest2[zig];
if (ac == 0)
{
runLength++;
}
else
{
while (runLength > 15)
{
this.EmitHuff(h, 0xf0, ref emitBufferBase);
runLength -= 16;
}
this.EmitHuffRLE(h, runLength, ac, ref emitBufferBase);
runLength = 0;
}
}
if (runLength > 0)
{
this.EmitHuff(h, 0x00, ref emitBufferBase);
}
return dc;
}
/// <summary>
/// Writes the Define Huffman Table marker and tables.
/// </summary>
@ -638,34 +295,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
this.WriteMarkerHeader(JpegConstants.Markers.DHT, markerlen);
for (int i = 0; i < specs.Length; i++)
{
ref HuffmanSpec spec = ref specs[i];
int len = 0;
fixed (byte* huffman = this.huffmanBuffer)
fixed (byte* count = spec.Count)
fixed (byte* values = spec.Values)
{
huffman[len++] = headers[i];
for (int c = 0; c < spec.Count.Length; c++)
{
huffman[len++] = count[c];
}
for (int v = 0; v < spec.Values.Length; v++)
{
huffman[len++] = values[v];
}
}
this.outputStream.Write(this.huffmanBuffer, 0, len);
this.outputStream.WriteByte(headers[i]);
this.outputStream.Write(specs[i].Count);
this.outputStream.Write(specs[i].Values);
}
}
/// <summary>
/// Writes the Define Quantization Marker and tables.
/// </summary>
private void WriteDefineQuantizationTables()
private void WriteDefineQuantizationTables(ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable)
{
// Marker + quantization table lengths
int markerlen = 2 + (QuantizationTableCount * (1 + Block8x8F.Size));
@ -677,8 +316,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
byte[] dqt = new byte[dqtCount];
int offset = 0;
WriteDataToDqt(dqt, ref offset, QuantIndex.Luminance, ref this.luminanceQuantTable);
WriteDataToDqt(dqt, ref offset, QuantIndex.Chrominance, ref this.chrominanceQuantTable);
WriteDataToDqt(dqt, ref offset, QuantIndex.Luminance, ref luminanceQuantTable);
WriteDataToDqt(dqt, ref offset, QuantIndex.Chrominance, ref chrominanceQuantTable);
this.outputStream.Write(dqt, 0, dqtCount);
}
@ -982,7 +621,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
private void WriteStartOfScan<TPixel>(Image<TPixel> image, int componentCount, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
// TODO: Need a JpegScanEncoder<TPixel> class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.)
Span<byte> componentId = stackalloc byte[]
{
0x01,
@ -1024,111 +662,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
this.buffer[sosSize] = 0x3f; // Se - End of spectral selection.
this.buffer[sosSize + 1] = 0x00; // Ah + Ah (Successive approximation bit position high + low)
this.outputStream.Write(this.buffer, 0, sosSize + 2);
ref byte emitBufferBase = ref MemoryMarshal.GetReference<byte>(this.emitBuffer);
if (this.colorType == JpegColorType.Luminance)
{
this.EncodeGrayscale(image, cancellationToken, ref emitBufferBase);
}
else
{
switch (this.subsample)
{
case JpegSubsample.Ratio444:
this.Encode444(image, cancellationToken, ref emitBufferBase);
break;
case JpegSubsample.Ratio420:
this.Encode420(image, cancellationToken, ref emitBufferBase);
break;
}
}
// Pad the last byte with 1's.
this.Emit(0x7f, 7, ref emitBufferBase);
}
/// <summary>
/// Encodes the image with subsampling. The Cb and Cr components are each subsampled
/// at a factor of 2 both horizontally and vertically.
/// Writes the EndOfImage marker.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
/// <param name="emitBufferBase">The reference to the emit buffer.</param>
private void Encode420<TPixel>(Image<TPixel> pixels, CancellationToken cancellationToken, ref byte emitBufferBase)
where TPixel : unmanaged, IPixel<TPixel>
private void WriteEndOfImageMarker()
{
// TODO: Need a JpegScanEncoder<TPixel> class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.)
Block8x8F b = default;
Span<Block8x8F> cb = stackalloc Block8x8F[4];
Span<Block8x8F> cr = stackalloc Block8x8F[4];
Block8x8F temp1 = default;
Block8x8F temp2 = default;
Block8x8F onStackLuminanceQuantTable = this.luminanceQuantTable;
Block8x8F onStackChrominanceQuantTable = this.chrominanceQuantTable;
var unzig = ZigZag.CreateUnzigTable();
var pixelConverter = YCbCrForwardConverter<TPixel>.Create();
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
ImageFrame<TPixel> frame = pixels.Frames.RootFrame;
Buffer2D<TPixel> pixelBuffer = frame.PixelBuffer;
RowOctet<TPixel> currentRows = default;
for (int y = 0; y < pixels.Height; y += 16)
{
cancellationToken.ThrowIfCancellationRequested();
for (int x = 0; x < pixels.Width; x += 16)
{
for (int i = 0; i < 4; i++)
{
int xOff = (i & 1) * 8;
int yOff = (i & 2) * 4;
currentRows.Update(pixelBuffer, y + yOff);
pixelConverter.Convert(frame, x + xOff, y + yOff, ref currentRows);
cb[i] = pixelConverter.Cb;
cr[i] = pixelConverter.Cr;
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
ref temp1,
ref temp2,
ref onStackLuminanceQuantTable,
ref unzig,
ref emitBufferBase);
}
Block8x8F.Scale16X16To8X8(ref b, cb);
prevDCCb = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCb,
ref b,
ref temp1,
ref temp2,
ref onStackChrominanceQuantTable,
ref unzig,
ref emitBufferBase);
Block8x8F.Scale16X16To8X8(ref b, cr);
prevDCCr = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCr,
ref b,
ref temp1,
ref temp2,
ref onStackChrominanceQuantTable,
ref unzig,
ref emitBufferBase);
}
}
this.buffer[0] = JpegConstants.Markers.XFF;
this.buffer[1] = JpegConstants.Markers.EOI;
this.outputStream.Write(this.buffer, 0, 2);
}
/// <summary>
@ -1145,5 +688,34 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
this.buffer[3] = (byte)(length & 0xff);
this.outputStream.Write(this.buffer, 0, 4);
}
/// <summary>
/// Initializes quantization table.
/// </summary>
/// <param name="i">The quantization index.</param>
/// <param name="scale">The scaling factor.</param>
/// <param name="quant">The quantization table.</param>
private static void InitQuantizationTable(int i, int scale, ref Block8x8F quant)
{
DebugGuard.MustBeBetweenOrEqualTo(i, 0, 1, nameof(i));
ReadOnlySpan<byte> unscaledQuant = (i == 0) ? UnscaledQuant_Luminance : UnscaledQuant_Chrominance;
for (int j = 0; j < Block8x8F.Size; j++)
{
int x = unscaledQuant[j];
x = ((x * scale) + 50) / 100;
if (x < 1)
{
x = 1;
}
if (x > 255)
{
x = 255;
}
quant[j] = x;
}
}
}
}

3
src/ImageSharp/Formats/Jpeg/JpegThrowHelper.cs

@ -46,5 +46,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
[MethodImpl(InliningOptions.ColdPath)]
public static void ThrowInvalidImageDimensions(int width, int height) => throw new InvalidImageContentException($"Invalid image dimensions: {width}x{height}.");
[MethodImpl(InliningOptions.ColdPath)]
public static void ThrowDimensionsTooLarge(int width, int height) => throw new ImageFormatException($"Image is too large to encode at {width}x{height} for JPEG format.");
}
}

4
src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs

@ -130,9 +130,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Transforms
int radius = (int)TolerantMath.Ceiling(scale * sampler.Radius);
// 'ratio' is a rational number.
// Multiplying it by LCM(sourceSize, destSize)/sourceSize will result in a whole number "again".
// Multiplying it by destSize/GCD(sourceSize, destSize) will result in a whole number "again".
// This value is determining the length of the periods in repeating kernel map rows.
int period = Numerics.LeastCommonMultiple(sourceSize, destinationSize) / sourceSize;
int period = destinationSize / Numerics.GreatestCommonDivisor(sourceSize, destinationSize);
// the center position at i == 0:
double center0 = (ratio - 1) * 0.5;

38
tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Scale16X16To8X8.cs

@ -1,38 +0,0 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
namespace SixLabors.ImageSharp.Benchmarks.Format.Jpeg.Components
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Block8x8F_Scale16X16To8X8
{
private Block8x8F source;
private readonly Block8x8F[] target = new Block8x8F[4];
[GlobalSetup]
public void Setup()
{
var random = new Random();
float[] f = new float[8 * 8];
for (int i = 0; i < f.Length; i++)
{
f[i] = (float)random.NextDouble();
}
for (int i = 0; i < 4; i++)
{
this.target[i] = Block8x8F.Load(f);
}
this.source = Block8x8F.Load(f);
}
[Benchmark]
public void Scale16X16To8X8() => Block8x8F.Scale16X16To8X8(ref this.source, this.target);
}
}

91
tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs

@ -4,6 +4,7 @@
using System.Drawing.Imaging;
using System.IO;
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Tests;
using SDImage = System.Drawing.Image;
@ -12,10 +13,22 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
public class EncodeJpeg
{
// System.Drawing needs this.
private Stream bmpStream;
[Params(75, 90, 100)]
public int Quality;
private const string TestImage = TestImages.Jpeg.BenchmarkSuite.Jpeg420Exif_MidSizeYCbCr;
// System.Drawing
private SDImage bmpDrawing;
private Stream bmpStream;
private ImageCodecInfo jpegCodec;
private EncoderParameters encoderParameters;
// ImageSharp
private Image<Rgba32> bmpCore;
private JpegEncoder encoder420;
private JpegEncoder encoder444;
private MemoryStream destinationStream;
[GlobalSetup]
@ -23,12 +36,20 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
if (this.bmpStream == null)
{
const string TestImage = TestImages.Jpeg.BenchmarkSuite.Jpeg420Exif_MidSizeYCbCr;
this.bmpStream = File.OpenRead(Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, TestImage));
this.bmpCore = Image.Load<Rgba32>(this.bmpStream);
this.bmpCore.Metadata.ExifProfile = null;
this.encoder420 = new JpegEncoder { Quality = this.Quality, Subsample = JpegSubsample.Ratio420 };
this.encoder444 = new JpegEncoder { Quality = this.Quality, Subsample = JpegSubsample.Ratio444 };
this.bmpStream.Position = 0;
this.bmpDrawing = SDImage.FromStream(this.bmpStream);
this.jpegCodec = GetEncoder(ImageFormat.Jpeg);
this.encoderParameters = new EncoderParameters(1);
// Quality cast to long is necessary
this.encoderParameters.Param[0] = new EncoderParameter(Encoder.Quality, (long)this.Quality);
this.destinationStream = new MemoryStream();
}
}
@ -38,36 +59,72 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
{
this.bmpStream.Dispose();
this.bmpStream = null;
this.destinationStream.Dispose();
this.destinationStream = null;
this.bmpCore.Dispose();
this.bmpDrawing.Dispose();
this.encoderParameters.Dispose();
}
[Benchmark(Baseline = true, Description = "System.Drawing Jpeg")]
[Benchmark(Baseline = true, Description = "System.Drawing Jpeg 4:2:0")]
public void JpegSystemDrawing()
{
this.bmpDrawing.Save(this.destinationStream, ImageFormat.Jpeg);
this.bmpDrawing.Save(this.destinationStream, this.jpegCodec, this.encoderParameters);
this.destinationStream.Seek(0, SeekOrigin.Begin);
}
[Benchmark(Description = "ImageSharp Jpeg 4:2:0")]
public void JpegCore420()
{
this.bmpCore.SaveAsJpeg(this.destinationStream, this.encoder420);
this.destinationStream.Seek(0, SeekOrigin.Begin);
}
[Benchmark(Description = "ImageSharp Jpeg")]
public void JpegCore()
[Benchmark(Description = "ImageSharp Jpeg 4:4:4")]
public void JpegCore444()
{
this.bmpCore.SaveAsJpeg(this.destinationStream);
this.bmpCore.SaveAsJpeg(this.destinationStream, this.encoder444);
this.destinationStream.Seek(0, SeekOrigin.Begin);
}
// https://docs.microsoft.com/en-us/dotnet/api/system.drawing.imaging.encoderparameter?redirectedfrom=MSDN&view=net-5.0
private static ImageCodecInfo GetEncoder(ImageFormat format)
{
ImageCodecInfo[] codecs = ImageCodecInfo.GetImageDecoders();
foreach (ImageCodecInfo codec in codecs)
{
if (codec.FormatID == format.Guid)
{
return codec;
}
}
return null;
}
}
}
/*
BenchmarkDotNet=v0.12.1, OS=Windows 10.0.18363.959 (1909/November2018Update/19H2)
Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
.NET Core SDK=3.1.302
[Host] : .NET Core 3.1.6 (CoreCLR 4.700.20.26901, CoreFX 4.700.20.31603), X64 RyuJIT
DefaultJob : .NET Core 3.1.6 (CoreCLR 4.700.20.26901, CoreFX 4.700.20.31603), X64 RyuJIT
BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19042
Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
.NET Core SDK=6.0.100-preview.3.21202.5
[Host] : .NET Core 3.1.13 (CoreCLR 4.700.21.11102, CoreFX 4.700.21.11602), X64 RyuJIT [AttachedDebugger]
DefaultJob : .NET Core 3.1.13 (CoreCLR 4.700.21.11102, CoreFX 4.700.21.11602), X64 RyuJIT
| Method | Mean | Error | StdDev | Ratio | RatioSD |
|---------------------- |---------:|----------:|----------:|------:|--------:|
| 'System.Drawing Jpeg' | 4.297 ms | 0.0244 ms | 0.0228 ms | 1.00 | 0.00 |
| 'ImageSharp Jpeg' | 5.286 ms | 0.1034 ms | 0.0967 ms | 1.23 | 0.02 |
| Method | Quality | Mean | Error | StdDev | Ratio | RatioSD |
|---------------------------- |-------- |---------:|---------:|---------:|------:|--------:|
| 'System.Drawing Jpeg 4:2:0' | 75 | 30.60 ms | 0.496 ms | 0.464 ms | 1.00 | 0.00 |
| 'ImageSharp Jpeg 4:2:0' | 75 | 29.86 ms | 0.350 ms | 0.311 ms | 0.98 | 0.02 |
| 'ImageSharp Jpeg 4:4:4' | 75 | 45.36 ms | 0.899 ms | 1.036 ms | 1.48 | 0.05 |
| | | | | | | |
| 'System.Drawing Jpeg 4:2:0' | 90 | 34.05 ms | 0.669 ms | 0.687 ms | 1.00 | 0.00 |
| 'ImageSharp Jpeg 4:2:0' | 90 | 37.26 ms | 0.706 ms | 0.660 ms | 1.10 | 0.03 |
| 'ImageSharp Jpeg 4:4:4' | 90 | 52.54 ms | 0.579 ms | 0.514 ms | 1.55 | 0.04 |
| | | | | | | |
| 'System.Drawing Jpeg 4:2:0' | 100 | 39.36 ms | 0.267 ms | 0.237 ms | 1.00 | 0.00 |
| 'ImageSharp Jpeg 4:2:0' | 100 | 42.44 ms | 0.410 ms | 0.383 ms | 1.08 | 0.01 |
| 'ImageSharp Jpeg 4:4:4' | 100 | 70.88 ms | 0.508 ms | 0.450 ms | 1.80 | 0.02 |
*/

4
tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs

@ -37,7 +37,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Format.Jpeg.Components.Encoder
Block8x8F cb = default;
Block8x8F cr = default;
this.converter.Convert(this.data.AsSpan(), ref y, ref cb, ref cr);
this.converter.Convert444(this.data.AsSpan(), ref y, ref cb, ref cr);
}
[Benchmark]
@ -49,7 +49,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Format.Jpeg.Components.Encoder
if (RgbToYCbCrConverterVectorized.IsSupported)
{
RgbToYCbCrConverterVectorized.Convert(this.data.AsSpan(), ref y, ref cb, ref cr);
RgbToYCbCrConverterVectorized.Convert444(this.data.AsSpan(), ref y, ref cb, ref cr);
}
}
}

264
tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs

@ -2,10 +2,12 @@
// Licensed under the Apache License, Version 2.0.
using System;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics.X86;
#endif
using SixLabors.ImageSharp.Formats.Jpeg.Components;
using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
using Xunit.Abstractions;
@ -22,94 +24,180 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{
}
[Fact]
public void IDCT2D8x4_LeftPart()
// Reference tests
[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public void LLM_TransformIDCT_CompareToNonOptimized(int seed)
{
float[] sourceArray = Create8x8FloatData();
var expectedDestArray = new float[64];
float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
var source = Block8x8F.Load(sourceArray);
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(sourceArray, expectedDestArray);
Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref source);
var temp = default(Block8x8F);
var actual = default(Block8x8F);
FastFloatingPointDCT.TransformIDCT(ref source, ref actual, ref temp);
var source = default(Block8x8F);
source.LoadFrom(sourceArray);
this.CompareBlocks(expected, actual, 1f);
}
var dest = default(Block8x8F);
[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public void LLM_TransformIDCT_CompareToAccurate(int seed)
{
float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
FastFloatingPointDCT.IDCT8x4_LeftPart(ref source, ref dest);
var source = Block8x8F.Load(sourceArray);
var actualDestArray = new float[64];
dest.ScaledCopyTo(actualDestArray);
Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref source);
this.Print8x8Data(expectedDestArray);
this.Output.WriteLine("**************");
this.Print8x8Data(actualDestArray);
var temp = default(Block8x8F);
var actual = default(Block8x8F);
FastFloatingPointDCT.TransformIDCT(ref source, ref actual, ref temp);
Assert.Equal(expectedDestArray, actualDestArray);
this.CompareBlocks(expected, actual, 1f);
}
[Fact]
public void IDCT2D8x4_RightPart()
// Inverse transform
[Theory]
[InlineData(1)]
[InlineData(2)]
public void IDCT8x4_LeftPart(int seed)
{
float[] sourceArray = Create8x8FloatData();
var expectedDestArray = new float[64];
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(sourceArray.AsSpan(4), expectedDestArray.AsSpan(4));
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
var source = default(Block8x8F);
source.LoadFrom(sourceArray);
var destBlock = default(Block8x8F);
var dest = default(Block8x8F);
var expectedDest = new float[64];
FastFloatingPointDCT.IDCT8x4_RightPart(ref source, ref dest);
// reference
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest);
var actualDestArray = new float[64];
dest.ScaledCopyTo(actualDestArray);
// testee
FastFloatingPointDCT.IDCT8x4_LeftPart(ref srcBlock, ref destBlock);
this.Print8x8Data(expectedDestArray);
this.Output.WriteLine("**************");
this.Print8x8Data(actualDestArray);
var actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(expectedDestArray, actualDestArray);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public void LLM_TransformIDCT_CompareToNonOptimized(int seed)
public void IDCT8x4_RightPart(int seed)
{
float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
var source = Block8x8F.Load(sourceArray);
var destBlock = default(Block8x8F);
Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref source);
var expectedDest = new float[64];
var temp = default(Block8x8F);
var actual = default(Block8x8F);
FastFloatingPointDCT.TransformIDCT(ref source, ref actual, ref temp);
// reference
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
this.CompareBlocks(expected, actual, 1f);
// testee
FastFloatingPointDCT.IDCT8x4_RightPart(ref srcBlock, ref destBlock);
var actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public void LLM_TransformIDCT_CompareToAccurate(int seed)
public void IDCT8x8_Avx(int seed)
{
float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
#if SUPPORTS_RUNTIME_INTRINSICS
var skip = !Avx.IsSupported;
#else
var skip = true;
#endif
if (skip)
{
this.Output.WriteLine("No AVX present, skipping test!");
return;
}
var source = Block8x8F.Load(sourceArray);
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref source);
var destBlock = default(Block8x8F);
var temp = default(Block8x8F);
var actual = default(Block8x8F);
FastFloatingPointDCT.TransformIDCT(ref source, ref actual, ref temp);
var expectedDest = new float[64];
this.CompareBlocks(expected, actual, 1f);
// reference, left part
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest);
// reference, right part
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
// testee, whole 8x8
FastFloatingPointDCT.IDCT8x8_Avx(ref srcBlock, ref destBlock);
var actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void TransformIDCT(int seed)
{
static void RunTest(string serialized)
{
int seed = FeatureTestRunner.Deserialize<int>(serialized);
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
var destBlock = default(Block8x8F);
var expectedDest = new float[64];
var temp1 = new float[64];
var temp2 = default(Block8x8F);
// reference
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp1);
// testee
FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref destBlock, ref temp2);
var actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
// 3 paths:
// 1. AllowAll - call avx/fma implementation
// 2. DisableFMA - call avx implementation without fma acceleration
// 3. DisableAvx - call fallback code of Vector4 implementation
//
// DisableSSE isn't needed because fallback Vector4 code will compile to either sse or fallback code with same result
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX);
}
// Forward transform
[Theory]
[InlineData(1)]
[InlineData(2)]
@ -123,7 +211,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
var expectedDest = new float[64];
// reference
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D8x4_32f(src, expectedDest);
// testee
FastFloatingPointDCT.FDCT8x4_LeftPart(ref srcBlock, ref destBlock);
var actualDest = new float[64];
@ -145,7 +236,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
var expectedDest = new float[64];
// reference
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
// testee
FastFloatingPointDCT.FDCT8x4_RightPart(ref srcBlock, ref destBlock);
var actualDest = new float[64];
@ -157,8 +251,19 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[Theory]
[InlineData(1)]
[InlineData(2)]
public void TransformFDCT(int seed)
public void FDCT8x8_Avx(int seed)
{
#if SUPPORTS_RUNTIME_INTRINSICS
var skip = !Avx.IsSupported;
#else
var skip = true;
#endif
if (skip)
{
this.Output.WriteLine("No AVX present, skipping test!");
return;
}
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
@ -166,17 +271,64 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
var destBlock = default(Block8x8F);
var expectedDest = new float[64];
var temp1 = new float[64];
var temp2 = default(Block8x8F);
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true);
FastFloatingPointDCT.TransformFDCT(ref srcBlock, ref destBlock, ref temp2, false);
// reference, left part
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D8x4_32f(src, expectedDest);
// reference, right part
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
// testee, whole 8x8
FastFloatingPointDCT.FDCT8x8_Avx(ref srcBlock, ref destBlock);
var actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void TransformFDCT(int seed)
{
static void RunTest(string serialized)
{
int seed = FeatureTestRunner.Deserialize<int>(serialized);
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
var destBlock = default(Block8x8F);
var expectedDest = new float[64];
var temp1 = new float[64];
var temp2 = default(Block8x8F);
// reference
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true);
// testee
FastFloatingPointDCT.TransformFDCT(ref srcBlock, ref destBlock, ref temp2, false);
var actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
// 3 paths:
// 1. AllowAll - call avx/fma implementation
// 2. DisableFMA - call avx implementation without fma acceleration
// 3. DisableAvx - call fallback code of Vector4 implementation
//
// DisableSSE isn't needed because fallback Vector4 code will compile to either sse or fallback code with same result
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX);
}
}
}
}

214
tests/ImageSharp.Tests/Formats/Jpg/RgbToYCbCrConverterTests.cs

@ -1,7 +1,13 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using SixLabors.ImageSharp.ColorSpaces;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
using SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder;
@ -23,22 +29,23 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
private ITestOutputHelper Output { get; }
[Fact]
public void TestLutConverter()
public void TestConverterLut444()
{
Rgb24[] data = CreateTestData();
int dataSize = 8 * 8;
Rgb24[] data = CreateTestData(dataSize);
var target = RgbToYCbCrConverterLut.Create();
Block8x8F y = default;
Block8x8F cb = default;
Block8x8F cr = default;
target.Convert(data.AsSpan(), ref y, ref cb, ref cr);
target.Convert444(data.AsSpan(), ref y, ref cb, ref cr);
Verify(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(1F));
Verify444(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(1F));
}
[Fact]
public void TestVectorizedConverter()
public void TestConverterVectorized444()
{
if (!RgbToYCbCrConverterVectorized.IsSupported)
{
@ -46,18 +53,187 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
return;
}
Rgb24[] data = CreateTestData();
int dataSize = 8 * 8;
Rgb24[] data = CreateTestData(dataSize);
Block8x8F y = default;
Block8x8F cb = default;
Block8x8F cr = default;
RgbToYCbCrConverterVectorized.Convert(data.AsSpan(), ref y, ref cb, ref cr);
RgbToYCbCrConverterVectorized.Convert444(data.AsSpan(), ref y, ref cb, ref cr);
Verify(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(0.0001F));
Verify444(data, ref y, ref cb, ref cr, new ApproximateColorSpaceComparer(0.0001F));
}
private static void Verify(ReadOnlySpan<Rgb24> data, ref Block8x8F yResult, ref Block8x8F cbResult, ref Block8x8F crResult, ApproximateColorSpaceComparer comparer)
[Fact]
public void TestConverterLut420()
{
int dataSize = 16 * 16;
Span<Rgb24> data = CreateTestData(dataSize).AsSpan();
var target = RgbToYCbCrConverterLut.Create();
var yBlocks = new Block8x8F[4];
var cb = default(Block8x8F);
var cr = default(Block8x8F);
target.Convert420(data, ref yBlocks[0], ref yBlocks[1], ref cb, ref cr, 0);
target.Convert420(data.Slice(16 * 8), ref yBlocks[2], ref yBlocks[3], ref cb, ref cr, 1);
Verify420(data, yBlocks, ref cb, ref cr, new ApproximateFloatComparer(1F));
}
[Fact]
public void TestConverterVectorized420()
{
if (!RgbToYCbCrConverterVectorized.IsSupported)
{
this.Output.WriteLine("No AVX and/or FMA present, skipping test!");
return;
}
int dataSize = 16 * 16;
Span<Rgb24> data = CreateTestData(dataSize).AsSpan();
var yBlocks = new Block8x8F[4];
var cb = default(Block8x8F);
var cr = default(Block8x8F);
RgbToYCbCrConverterVectorized.Convert420(data, ref yBlocks[0], ref yBlocks[1], ref cb, ref cr, 0);
RgbToYCbCrConverterVectorized.Convert420(data.Slice(16 * 8), ref yBlocks[2], ref yBlocks[3], ref cb, ref cr, 1);
Verify420(data, yBlocks, ref cb, ref cr, new ApproximateFloatComparer(1F));
}
#if SUPPORTS_RUNTIME_INTRINSICS
[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public void Scale16x2_8x1(int seed)
{
if (!Avx2.IsSupported)
{
return;
}
Span<float> data = new Random(seed).GenerateRandomFloatArray(Vector256<float>.Count * 4, -1000, 1000);
// Act:
Vector256<float> resultVector = RgbToYCbCrConverterVectorized.Scale16x2_8x1(MemoryMarshal.Cast<float, Vector256<float>>(data));
ref float result = ref Unsafe.As<Vector256<float>, float>(ref resultVector);
// Assert:
// Comparison epsilon is tricky but 10^(-4) is good enough (?)
var comparer = new ApproximateFloatComparer(0.0001f);
for (int i = 0; i < Vector256<float>.Count; i++)
{
float actual = Unsafe.Add(ref result, i);
float expected = CalculateAverage16x2_8x1(data, i);
Assert.True(comparer.Equals(actual, expected), $"Pos {i}, Expected: {expected}, Actual: {actual}");
}
static float CalculateAverage16x2_8x1(Span<float> data, int index)
{
int upIdx = index * 2;
int lowIdx = (index + 8) * 2;
return 0.25f * (data[upIdx] + data[upIdx + 1] + data[lowIdx] + data[lowIdx + 1]);
}
}
#endif
private static void Verify444(
ReadOnlySpan<Rgb24> data,
ref Block8x8F yResult,
ref Block8x8F cbResult,
ref Block8x8F crResult,
ApproximateColorSpaceComparer comparer)
{
Block8x8F y = default;
Block8x8F cb = default;
Block8x8F cr = default;
RgbToYCbCr(data, ref y, ref cb, ref cr);
for (int i = 0; i < Block8x8F.Size; i++)
{
Assert.True(comparer.Equals(new YCbCr(y[i], cb[i], cr[i]), new YCbCr(yResult[i], cbResult[i], crResult[i])), $"Pos {i}, Expected {y[i]} == {yResult[i]}, {cb[i]} == {cbResult[i]}, {cr[i]} == {crResult[i]}");
}
}
private static void Verify420(
ReadOnlySpan<Rgb24> data,
Block8x8F[] yResult,
ref Block8x8F cbResult,
ref Block8x8F crResult,
ApproximateFloatComparer comparer)
{
var trueBlock = default(Block8x8F);
var cbTrue = new Block8x8F[4];
var crTrue = new Block8x8F[4];
Span<Rgb24> tempData = new Rgb24[8 * 8].AsSpan();
// top left
Copy8x8(data, tempData);
RgbToYCbCr(tempData, ref trueBlock, ref cbTrue[0], ref crTrue[0]);
VerifyBlock(ref yResult[0], ref trueBlock, comparer);
// top right
Copy8x8(data.Slice(8), tempData);
RgbToYCbCr(tempData, ref trueBlock, ref cbTrue[1], ref crTrue[1]);
VerifyBlock(ref yResult[1], ref trueBlock, comparer);
// bottom left
Copy8x8(data.Slice(8 * 16), tempData);
RgbToYCbCr(tempData, ref trueBlock, ref cbTrue[2], ref crTrue[2]);
VerifyBlock(ref yResult[2], ref trueBlock, comparer);
// bottom right
Copy8x8(data.Slice((8 * 16) + 8), tempData);
RgbToYCbCr(tempData, ref trueBlock, ref cbTrue[3], ref crTrue[3]);
VerifyBlock(ref yResult[3], ref trueBlock, comparer);
// verify Cb
Scale16X16To8X8(ref trueBlock, cbTrue);
VerifyBlock(ref cbResult, ref trueBlock, comparer);
// verify Cr
Scale16X16To8X8(ref trueBlock, crTrue);
VerifyBlock(ref crResult, ref trueBlock, comparer);
// extracts 8x8 blocks from 16x8 memory region
static void Copy8x8(ReadOnlySpan<Rgb24> source, Span<Rgb24> dest)
{
for (int i = 0; i < 8; i++)
{
source.Slice(i * 16, 8).CopyTo(dest.Slice(i * 8));
}
}
// scales 16x16 to 8x8, used in chroma subsampling tests
static void Scale16X16To8X8(ref Block8x8F dest, ReadOnlySpan<Block8x8F> source)
{
for (int i = 0; i < 4; i++)
{
int dstOff = ((i & 2) << 4) | ((i & 1) << 2);
Block8x8F iSource = source[i];
for (int y = 0; y < 4; y++)
{
for (int x = 0; x < 4; x++)
{
int j = (16 * y) + (2 * x);
float sum = iSource[j] + iSource[j + 1] + iSource[j + 8] + iSource[j + 9];
dest[(8 * y) + x + dstOff] = (sum + 2) * .25F;
}
}
}
}
}
private static void RgbToYCbCr(ReadOnlySpan<Rgb24> data, ref Block8x8F y, ref Block8x8F cb, ref Block8x8F cr)
{
for (int i = 0; i < data.Length; i++)
{
@ -65,17 +241,23 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
int g = data[i].G;
int b = data[i].B;
float y = (0.299F * r) + (0.587F * g) + (0.114F * b);
float cb = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b));
float cr = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b));
y[i] = (0.299F * r) + (0.587F * g) + (0.114F * b);
cb[i] = 128F + ((-0.168736F * r) - (0.331264F * g) + (0.5F * b));
cr[i] = 128F + ((0.5F * r) - (0.418688F * g) - (0.081312F * b));
}
}
Assert.True(comparer.Equals(new YCbCr(y, cb, cr), new YCbCr(yResult[i], cbResult[i], crResult[i])), $"Pos {i}, Expected {y} == {yResult[i]}, {cb} == {cbResult[i]}, {cr} == {crResult[i]}");
private static void VerifyBlock(ref Block8x8F res, ref Block8x8F target, ApproximateFloatComparer comparer)
{
for (int i = 0; i < Block8x8F.Size; i++)
{
Assert.True(comparer.Equals(res[i], target[i]), $"Pos {i}, Expected: {target[i]}, Got: {res[i]}");
}
}
private static Rgb24[] CreateTestData()
private static Rgb24[] CreateTestData(int size)
{
var data = new Rgb24[64];
var data = new Rgb24[size];
var r = new Random();
var random = new byte[3];

3
tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeKernelMapTests.cs

@ -80,6 +80,9 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Transforms
{ KnownResamplers.Bicubic, 1680, 1200 },
{ KnownResamplers.Box, 13, 299 },
{ KnownResamplers.Lanczos5, 3032, 600 },
// Large number. https://github.com/SixLabors/ImageSharp/issues/1616
{ KnownResamplers.Bicubic, 207773, 51943 }
};
public static TheoryData<string, int, int> GeneratedImageResizeData =

Loading…
Cancel
Save