Browse Source

Merge pull request #1761 from br3aker/jpeg-encoder-optimization

Jpeg encoder optimization
pull/1771/head
Anton Firszov 4 years ago
committed by GitHub
parent
commit
2f903c7c9b
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 193
      src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs
  2. 149
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs
  3. 2
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopyTo.cs
  4. 435
      src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs
  5. 19
      src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs
  6. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/IRawJpegData.cs
  7. 26
      src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
  8. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs
  9. 21
      src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs
  10. 512
      src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs
  11. 161
      src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs
  12. 561
      src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
  13. 67
      src/ImageSharp/Formats/Jpeg/Components/Quantization.cs
  14. 300
      src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs
  15. 79
      src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs
  16. 10
      src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs
  17. 51
      src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs
  18. 11
      src/ImageSharp/Formats/Tiff/Compression/Decompressors/JpegTiffCompression.cs
  19. 2
      src/ImageSharp/Formats/Tiff/Compression/Decompressors/RgbJpegSpectralConverter.cs
  20. 50
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Quantize.cs
  21. 33
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs
  22. 26
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs
  23. 10
      tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs
  24. 11
      tests/ImageSharp.Benchmarks/Program.cs
  25. 3
      tests/ImageSharp.Tests.ProfilingSandbox/Program.cs
  26. 135
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
  27. 195
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8Tests.cs
  28. 163
      tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
  29. 152
      tests/ImageSharp.Tests/Formats/Jpg/HuffmanScanEncoderTests.cs
  30. 8
      tests/ImageSharp.Tests/Formats/Jpg/QuantizationTests.cs
  31. 32
      tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs
  32. 2
      tests/ImageSharp.Tests/Formats/Jpg/Utils/LibJpegTools.ComponentData.cs
  33. 54
      tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs
  34. 5
      tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs
  35. 46
      tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs

193
src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs

@ -2,17 +2,22 @@
// Licensed under the Apache License, Version 2.0.
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using System.Text;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
/// <summary>
/// Represents a Jpeg block with <see cref="short"/> coefficients.
/// 8x8 matrix of <see cref="short"/> coefficients.
/// </summary>
// ReSharper disable once InconsistentNaming
[StructLayout(LayoutKind.Explicit)]
internal unsafe struct Block8x8 : IEquatable<Block8x8>
{
/// <summary>
@ -20,24 +25,44 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// </summary>
public const int Size = 64;
#pragma warning disable IDE0051 // Remove unused private member
/// <summary>
/// A fixed size buffer holding the values.
/// See: <see>
/// <cref>https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/unsafe-code-pointers/fixed-size-buffers</cref>
/// </see>
/// A placeholder buffer so the actual struct occupies exactly 64 * 2 bytes.
/// </summary>
/// <remarks>
/// This is not used directly in the code.
/// </remarks>
[FieldOffset(0)]
private fixed short data[Size];
/// <summary>
/// Initializes a new instance of the <see cref="Block8x8"/> struct.
/// </summary>
/// <param name="coefficients">A <see cref="Span{T}"/> of coefficients</param>
public Block8x8(Span<short> coefficients)
{
ref byte selfRef = ref Unsafe.As<Block8x8, byte>(ref this);
ref byte sourceRef = ref Unsafe.As<short, byte>(ref MemoryMarshal.GetReference(coefficients));
Unsafe.CopyBlock(ref selfRef, ref sourceRef, Size * sizeof(short));
}
#pragma warning restore IDE0051
#if SUPPORTS_RUNTIME_INTRINSICS
[FieldOffset(0)]
public Vector128<short> V0;
[FieldOffset(16)]
public Vector128<short> V1;
[FieldOffset(32)]
public Vector128<short> V2;
[FieldOffset(48)]
public Vector128<short> V3;
[FieldOffset(64)]
public Vector128<short> V4;
[FieldOffset(80)]
public Vector128<short> V5;
[FieldOffset(96)]
public Vector128<short> V6;
[FieldOffset(112)]
public Vector128<short> V7;
[FieldOffset(0)]
public Vector256<short> V01;
[FieldOffset(32)]
public Vector256<short> V23;
[FieldOffset(64)]
public Vector256<short> V45;
[FieldOffset(96)]
public Vector256<short> V67;
#endif
/// <summary>
/// Gets or sets a <see cref="short"/> value at the given index
@ -49,7 +74,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
GuardBlockIndex(idx);
DebugGuard.MustBeBetweenOrEqualTo(idx, 0, Size - 1, nameof(idx));
ref short selfRef = ref Unsafe.As<Block8x8, short>(ref this);
return Unsafe.Add(ref selfRef, idx);
}
@ -57,7 +83,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
[MethodImpl(MethodImplOptions.AggressiveInlining)]
set
{
GuardBlockIndex(idx);
DebugGuard.MustBeBetweenOrEqualTo(idx, 0, Size - 1, nameof(idx));
ref short selfRef = ref Unsafe.As<Block8x8, short>(ref this);
Unsafe.Add(ref selfRef, idx) = value;
}
@ -75,15 +102,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
set => this[(y * 8) + x] = value;
}
public static bool operator ==(Block8x8 left, Block8x8 right)
{
return left.Equals(right);
}
public static bool operator ==(Block8x8 left, Block8x8 right) => left.Equals(right);
public static bool operator !=(Block8x8 left, Block8x8 right)
{
return !left.Equals(right);
}
public static bool operator !=(Block8x8 left, Block8x8 right) => !left.Equals(right);
/// <summary>
/// Multiply all elements by a given <see cref="int"/>
@ -149,34 +170,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
return result;
}
/// <summary>
/// Pointer-based "Indexer" (getter part)
/// </summary>
/// <param name="blockPtr">Block pointer</param>
/// <param name="idx">Index</param>
/// <returns>The scaleVec value at the specified index</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short GetScalarAt(Block8x8* blockPtr, int idx)
{
GuardBlockIndex(idx);
short* fp = blockPtr->data;
return fp[idx];
}
/// <summary>
/// Pointer-based "Indexer" (setter part)
/// </summary>
/// <param name="blockPtr">Block pointer</param>
/// <param name="idx">Index</param>
/// <param name="value">Value</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void SetScalarAt(Block8x8* blockPtr, int idx, short value)
public static Block8x8 Load(Span<short> data)
{
GuardBlockIndex(idx);
short* fp = blockPtr->data;
fp[idx] = value;
Unsafe.SkipInit(out Block8x8 result);
result.LoadFrom(data);
return result;
}
/// <summary>
@ -194,7 +192,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// </summary>
public short[] ToArray()
{
var result = new short[Size];
short[] result = new short[Size];
this.CopyTo(result);
return result;
}
@ -206,7 +204,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
ref byte selfRef = ref Unsafe.As<Block8x8, byte>(ref this);
ref byte destRef = ref MemoryMarshal.GetReference(MemoryMarshal.Cast<short, byte>(destination));
Unsafe.CopyBlock(ref destRef, ref selfRef, Size * sizeof(short));
Unsafe.CopyBlockUnaligned(ref destRef, ref selfRef, Size * sizeof(short));
}
/// <summary>
@ -220,6 +218,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
}
/// <summary>
/// Load raw 16bit integers from source.
/// </summary>
/// <param name="source">Source</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void LoadFrom(Span<short> source)
{
ref byte sourceRef = ref Unsafe.As<short, byte>(ref MemoryMarshal.GetReference(source));
ref byte destRef = ref Unsafe.As<Block8x8, byte>(ref this);
Unsafe.CopyBlockUnaligned(ref destRef, ref sourceRef, Size * sizeof(short));
}
/// <summary>
/// Cast and copy <see cref="Size"/> <see cref="int"/>-s from the beginning of 'source' span.
/// </summary>
@ -231,13 +242,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
}
[Conditional("DEBUG")]
private static void GuardBlockIndex(int idx)
{
DebugGuard.MustBeLessThan(idx, Size, nameof(idx));
DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx));
}
/// <inheritdoc />
public override string ToString()
{
@ -271,15 +275,66 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
/// <inheritdoc />
public override bool Equals(object obj)
{
return obj is Block8x8 other && this.Equals(other);
}
public override bool Equals(object obj) => obj is Block8x8 other && this.Equals(other);
/// <inheritdoc />
public override int GetHashCode()
public override int GetHashCode() => (this[0] * 31) + this[1];
/// <summary>
/// Returns index of the last non-zero element in given matrix.
/// </summary>
/// <returns>
/// Index of the last non-zero element. Returns -1 if all elements are equal to zero.
/// </returns>
[MethodImpl(InliningOptions.ShortMethod)]
public nint GetLastNonZeroIndex()
{
return (this[0] * 31) + this[1];
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
const int equalityMask = unchecked((int)0b1111_1111_1111_1111_1111_1111_1111_1111);
Vector256<short> zero16 = Vector256<short>.Zero;
ref Vector256<short> mcuStride = ref Unsafe.As<Block8x8, Vector256<short>>(ref this);
for (nint i = 3; i >= 0; i--)
{
int areEqual = Avx2.MoveMask(Avx2.CompareEqual(Unsafe.Add(ref mcuStride, i), zero16).AsByte());
if (areEqual != equalityMask)
{
// Each 2 bits represents comparison operation for each 2-byte element in input vectors
// LSB represents first element in the stride
// MSB represents last element in the stride
// lzcnt operation would calculate number of zero numbers at the end
// Given mask is not actually suitable for lzcnt as 1's represent zero elements and 0's represent non-zero elements
// So we need to invert it
int lzcnt = BitOperations.LeadingZeroCount(~(uint)areEqual);
// As input number is represented by 2 bits in the mask, we need to divide lzcnt result by 2
// to get the exact number of zero elements in the stride
int strideRelativeIndex = 15 - (lzcnt / 2);
return (i * 16) + strideRelativeIndex;
}
}
return -1;
}
else
#endif
{
nint index = Size - 1;
ref short elemRef = ref Unsafe.As<Block8x8, short>(ref this);
while (index >= 0 && Unsafe.Add(ref elemRef, index) == 0)
{
index--;
}
return index;
}
}
/// <summary>

149
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs

@ -0,0 +1,149 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
#if SUPPORTS_RUNTIME_INTRINSICS
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal partial struct Block8x8F
{
/// <summary>
/// A number of rows of 8 scalar coefficients each in <see cref="Block8x8F"/>
/// </summary>
public const int RowCount = 8;
[FieldOffset(0)]
public Vector256<float> V0;
[FieldOffset(32)]
public Vector256<float> V1;
[FieldOffset(64)]
public Vector256<float> V2;
[FieldOffset(96)]
public Vector256<float> V3;
[FieldOffset(128)]
public Vector256<float> V4;
[FieldOffset(160)]
public Vector256<float> V5;
[FieldOffset(192)]
public Vector256<float> V6;
[FieldOffset(224)]
public Vector256<float> V7;
private static readonly Vector256<int> MultiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7);
private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest)
{
DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!");
ref Vector256<float> aBase = ref a.V0;
ref Vector256<float> bBase = ref b.V0;
ref Vector256<short> destRef = ref dest.V01;
for (nint i = 0; i < 8; i += 2)
{
Vector256<int> row0 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0)));
Vector256<int> row1 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1)));
Vector256<short> row = Avx2.PackSignedSaturate(row0, row1);
row = Avx2.PermuteVar8x32(row.AsInt32(), MultiplyIntoInt16ShuffleMask).AsInt16();
Unsafe.Add(ref destRef, (IntPtr)((uint)i / 2)) = row;
}
}
private static void MultiplyIntoInt16_Sse2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest)
{
DebugGuard.IsTrue(Sse2.IsSupported, "Sse2 support is required to run this operation!");
ref Vector128<float> aBase = ref Unsafe.As<Block8x8F, Vector128<float>>(ref a);
ref Vector128<float> bBase = ref Unsafe.As<Block8x8F, Vector128<float>>(ref b);
ref Vector128<short> destBase = ref Unsafe.As<Block8x8, Vector128<short>>(ref dest);
for (int i = 0; i < 16; i += 2)
{
Vector128<int> left = Sse2.ConvertToVector128Int32(Sse.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0)));
Vector128<int> right = Sse2.ConvertToVector128Int32(Sse.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1)));
Vector128<short> row = Sse2.PackSignedSaturate(left, right);
Unsafe.Add(ref destBase, (IntPtr)((uint)i / 2)) = row;
}
}
private void TransposeInplace_Avx()
{
// https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536
Vector256<float> r0 = Avx.InsertVector128(
this.V0,
Unsafe.As<Vector4, Vector128<float>>(ref this.V4L),
1);
Vector256<float> r1 = Avx.InsertVector128(
this.V1,
Unsafe.As<Vector4, Vector128<float>>(ref this.V5L),
1);
Vector256<float> r2 = Avx.InsertVector128(
this.V2,
Unsafe.As<Vector4, Vector128<float>>(ref this.V6L),
1);
Vector256<float> r3 = Avx.InsertVector128(
this.V3,
Unsafe.As<Vector4, Vector128<float>>(ref this.V7L),
1);
Vector256<float> r4 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V4R),
1);
Vector256<float> r5 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V5R),
1);
Vector256<float> r6 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V6R),
1);
Vector256<float> r7 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V7R),
1);
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
Vector256<float> t2 = Avx.UnpackLow(r2, r3);
Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E);
this.V0 = Avx.Blend(t0, v, 0xCC);
this.V1 = Avx.Blend(t2, v, 0x33);
Vector256<float> t4 = Avx.UnpackLow(r4, r5);
Vector256<float> t6 = Avx.UnpackLow(r6, r7);
v = Avx.Shuffle(t4, t6, 0x4E);
this.V4 = Avx.Blend(t4, v, 0xCC);
this.V5 = Avx.Blend(t6, v, 0x33);
Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
v = Avx.Shuffle(t1, t3, 0x4E);
this.V2 = Avx.Blend(t1, v, 0xCC);
this.V3 = Avx.Blend(t3, v, 0x33);
Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
v = Avx.Shuffle(t5, t7, 0x4E);
this.V6 = Avx.Blend(t5, v, 0xCC);
this.V7 = Avx.Blend(t7, v, 0x33);
}
}
}
#endif

2
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.ScaledCopyTo.cs

@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Numerics;

435
src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

@ -16,7 +16,7 @@ using System.Text;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
/// <summary>
/// Represents a Jpeg block with <see cref="float"/> coefficients.
/// 8x8 matrix of <see cref="float"/> coefficients.
/// </summary>
[StructLayout(LayoutKind.Explicit)]
internal partial struct Block8x8F : IEquatable<Block8x8F>
@ -66,30 +66,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public Vector4 V7L;
[FieldOffset(240)]
public Vector4 V7R;
#if SUPPORTS_RUNTIME_INTRINSICS
/// <summary>
/// A number of rows of 8 scalar coefficients each in <see cref="Block8x8F"/>
/// </summary>
public const int RowCount = 8;
[FieldOffset(0)]
public Vector256<float> V0;
[FieldOffset(32)]
public Vector256<float> V1;
[FieldOffset(64)]
public Vector256<float> V2;
[FieldOffset(96)]
public Vector256<float> V3;
[FieldOffset(128)]
public Vector256<float> V4;
[FieldOffset(160)]
public Vector256<float> V5;
[FieldOffset(192)]
public Vector256<float> V6;
[FieldOffset(224)]
public Vector256<float> V7;
#endif
#pragma warning restore SA1600 // ElementsMustBeDocumented
/// <summary>
@ -102,7 +78,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
GuardBlockIndex(idx);
DebugGuard.MustBeBetweenOrEqualTo(idx, 0, Size - 1, nameof(idx));
ref float selfRef = ref Unsafe.As<Block8x8F, float>(ref this);
return Unsafe.Add(ref selfRef, (nint)(uint)idx);
}
@ -110,7 +86,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
[MethodImpl(MethodImplOptions.AggressiveInlining)]
set
{
GuardBlockIndex(idx);
DebugGuard.MustBeBetweenOrEqualTo(idx, 0, Size - 1, nameof(idx));
ref float selfRef = ref Unsafe.As<Block8x8F, float>(ref this);
Unsafe.Add(ref selfRef, (nint)(uint)idx) = value;
}
@ -188,13 +164,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
return result;
}
/// <summary>
/// Fill the block with defaults (zeroes).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public void Clear()
=> this = default; // The cheapest way to do this in C#:
/// <summary>
/// Load raw 32bit floating point data from source.
/// </summary>
@ -302,7 +271,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public float[] ToArray()
{
var result = new float[Size];
float[] result = new float[Size];
this.ScaledCopyTo(result);
return result;
}
@ -434,102 +403,37 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
}
/// <summary>
/// Quantize the block.
/// Quantize input block, apply zig-zag ordering and store result as 16bit integers.
/// </summary>
/// <param name="blockPtr">The block pointer.</param>
/// <param name="qtPtr">The qt pointer.</param>
/// <param name="unzigPtr">Unzig pointer</param>
public static unsafe void DequantizeBlock(Block8x8F* blockPtr, Block8x8F* qtPtr, byte* unzigPtr)
{
float* b = (float*)blockPtr;
float* qtp = (float*)qtPtr;
for (int qtIndex = 0; qtIndex < Size; qtIndex++)
{
byte blockIndex = unzigPtr[qtIndex];
float* unzigPos = b + blockIndex;
float val = *unzigPos;
val *= qtp[qtIndex];
*unzigPos = val;
}
}
/// <summary>
/// Quantize 'block' into 'dest' using the 'qt' quantization table:
/// Unzig the elements of block into dest, while dividing them by elements of qt and "pre-rounding" the values.
/// To finish the rounding it's enough to (int)-cast these values.
/// </summary>
/// <param name="block">Source block</param>
/// <param name="dest">Destination block</param>
/// <param name="qt">The quantization table</param>
/// <param name="unZig">The 8x8 Unzig block.</param>
public static unsafe void Quantize(
ref Block8x8F block,
ref Block8x8F dest,
ref Block8x8F qt,
ref ZigZag unZig)
/// <param name="block">Source block.</param>
/// <param name="dest">Destination block.</param>
/// <param name="qt">The quantization table.</param>
public static void Quantize(ref Block8x8F block, ref Block8x8 dest, ref Block8x8F qt)
{
for (int zig = 0; zig < Size; zig++)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
dest[zig] = block[unZig[zig]];
MultiplyIntoInt16_Avx2(ref block, ref qt, ref dest);
ZigZag.ApplyZigZagOrderingAvx2(ref dest);
}
DivideRoundAll(ref dest, ref qt);
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
else if (Ssse3.IsSupported)
{
var vnegOne = Vector256.Create(-1f);
var vadd = Vector256.Create(.5F);
var vone = Vector256.Create(1f);
for (int i = 0; i < RowCount; i++)
{
ref Vector256<float> aRow = ref Unsafe.Add(ref a.V0, i);
ref Vector256<float> bRow = ref Unsafe.Add(ref b.V0, i);
Vector256<float> voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aRow), vone), vadd);
aRow = Avx.Add(Avx.Divide(aRow, bRow), voff);
}
MultiplyIntoInt16_Sse2(ref block, ref qt, ref dest);
ZigZag.ApplyZigZagOrderingSsse3(ref dest);
}
else
#endif
{
a.V0L = DivideRound(a.V0L, b.V0L);
a.V0R = DivideRound(a.V0R, b.V0R);
a.V1L = DivideRound(a.V1L, b.V1L);
a.V1R = DivideRound(a.V1R, b.V1R);
a.V2L = DivideRound(a.V2L, b.V2L);
a.V2R = DivideRound(a.V2R, b.V2R);
a.V3L = DivideRound(a.V3L, b.V3L);
a.V3R = DivideRound(a.V3R, b.V3R);
a.V4L = DivideRound(a.V4L, b.V4L);
a.V4R = DivideRound(a.V4R, b.V4R);
a.V5L = DivideRound(a.V5L, b.V5L);
a.V5R = DivideRound(a.V5R, b.V5R);
a.V6L = DivideRound(a.V6L, b.V6L);
a.V6R = DivideRound(a.V6R, b.V6R);
a.V7L = DivideRound(a.V7L, b.V7L);
a.V7R = DivideRound(a.V7R, b.V7R);
for (int i = 0; i < Size; i++)
{
int idx = ZigZag.ZigZagOrder[i];
float quantizedVal = block[idx] * qt[idx];
quantizedVal += quantizedVal < 0 ? -0.5f : 0.5f;
dest[i] = (short)quantizedVal;
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor)
{
var neg = new Vector4(-1);
var add = new Vector4(.5F);
// sign(dividend) = max(min(dividend, 1), -1)
Vector4 sign = Numerics.Clamp(dividend, neg, Vector4.One);
// AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend)
return (dividend / divisor) + (sign * add);
}
public void RoundInto(ref Block8x8 dest)
{
for (int i = 0; i < Size; i++)
@ -627,6 +531,47 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
Unsafe.Add(ref dRef, 7) = bottom;
}
/// <summary>
/// Compares entire 8x8 block to a single scalar value.
/// </summary>
/// <param name="value">Value to compare to.</param>
public bool EqualsToScalar(int value)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
const int equalityMask = unchecked((int)0b1111_1111_1111_1111_1111_1111_1111_1111);
var targetVector = Vector256.Create(value);
ref Vector256<float> blockStride = ref this.V0;
for (int i = 0; i < RowCount; i++)
{
Vector256<int> areEqual = Avx2.CompareEqual(Avx.ConvertToVector256Int32WithTruncation(Unsafe.Add(ref this.V0, i)), targetVector);
if (Avx2.MoveMask(areEqual.AsByte()) != equalityMask)
{
return false;
}
}
return true;
}
#endif
{
ref float scalars = ref Unsafe.As<Block8x8F, float>(ref this);
for (int i = 0; i < Size; i++)
{
if ((int)Unsafe.Add(ref scalars, i) != value)
{
return false;
}
}
return true;
}
}
/// <inheritdoc />
public bool Equals(Block8x8F other)
=> this.V0L == other.V0L
@ -663,213 +608,89 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
return sb.ToString();
}
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector<float> NormalizeAndRound(Vector<float> row, Vector<float> off, Vector<float> max)
{
row += off;
row = Vector.Max(row, Vector<float>.Zero);
row = Vector.Min(row, max);
return row.FastRound();
}
[Conditional("DEBUG")]
private static void GuardBlockIndex(int idx)
{
DebugGuard.MustBeLessThan(idx, Size, nameof(idx));
DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx));
}
/// <summary>
/// Transpose the block into the destination block.
/// Transpose the block inplace.
/// </summary>
/// <param name="d">The destination block</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void TransposeInto(ref Block8x8F d)
public void TransposeInplace()
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
{
// https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536
Vector256<float> r0 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V0L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V4L),
1);
Vector256<float> r1 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V1L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V5L),
1);
Vector256<float> r2 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V2L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V6L),
1);
Vector256<float> r3 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V3L).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V7L),
1);
Vector256<float> r4 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V4R),
1);
Vector256<float> r5 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V5R),
1);
Vector256<float> r6 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V6R),
1);
Vector256<float> r7 = Avx.InsertVector128(
Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(),
Unsafe.As<Vector4, Vector128<float>>(ref this.V7R),
1);
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
Vector256<float> t2 = Avx.UnpackLow(r2, r3);
Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E);
d.V0 = Avx.Blend(t0, v, 0xCC);
d.V1 = Avx.Blend(t2, v, 0x33);
Vector256<float> t4 = Avx.UnpackLow(r4, r5);
Vector256<float> t6 = Avx.UnpackLow(r6, r7);
v = Avx.Shuffle(t4, t6, 0x4E);
d.V4 = Avx.Blend(t4, v, 0xCC);
d.V5 = Avx.Blend(t6, v, 0x33);
Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
v = Avx.Shuffle(t1, t3, 0x4E);
d.V2 = Avx.Blend(t1, v, 0xCC);
d.V3 = Avx.Blend(t3, v, 0x33);
Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
v = Avx.Shuffle(t5, t7, 0x4E);
d.V6 = Avx.Blend(t5, v, 0xCC);
d.V7 = Avx.Blend(t7, v, 0x33);
this.TransposeInplace_Avx();
}
else
#endif
{
d.V0L.X = this.V0L.X;
d.V1L.X = this.V0L.Y;
d.V2L.X = this.V0L.Z;
d.V3L.X = this.V0L.W;
d.V4L.X = this.V0R.X;
d.V5L.X = this.V0R.Y;
d.V6L.X = this.V0R.Z;
d.V7L.X = this.V0R.W;
d.V0L.Y = this.V1L.X;
d.V1L.Y = this.V1L.Y;
d.V2L.Y = this.V1L.Z;
d.V3L.Y = this.V1L.W;
d.V4L.Y = this.V1R.X;
d.V5L.Y = this.V1R.Y;
d.V6L.Y = this.V1R.Z;
d.V7L.Y = this.V1R.W;
d.V0L.Z = this.V2L.X;
d.V1L.Z = this.V2L.Y;
d.V2L.Z = this.V2L.Z;
d.V3L.Z = this.V2L.W;
d.V4L.Z = this.V2R.X;
d.V5L.Z = this.V2R.Y;
d.V6L.Z = this.V2R.Z;
d.V7L.Z = this.V2R.W;
d.V0L.W = this.V3L.X;
d.V1L.W = this.V3L.Y;
d.V2L.W = this.V3L.Z;
d.V3L.W = this.V3L.W;
d.V4L.W = this.V3R.X;
d.V5L.W = this.V3R.Y;
d.V6L.W = this.V3R.Z;
d.V7L.W = this.V3R.W;
d.V0R.X = this.V4L.X;
d.V1R.X = this.V4L.Y;
d.V2R.X = this.V4L.Z;
d.V3R.X = this.V4L.W;
d.V4R.X = this.V4R.X;
d.V5R.X = this.V4R.Y;
d.V6R.X = this.V4R.Z;
d.V7R.X = this.V4R.W;
d.V0R.Y = this.V5L.X;
d.V1R.Y = this.V5L.Y;
d.V2R.Y = this.V5L.Z;
d.V3R.Y = this.V5L.W;
d.V4R.Y = this.V5R.X;
d.V5R.Y = this.V5R.Y;
d.V6R.Y = this.V5R.Z;
d.V7R.Y = this.V5R.W;
d.V0R.Z = this.V6L.X;
d.V1R.Z = this.V6L.Y;
d.V2R.Z = this.V6L.Z;
d.V3R.Z = this.V6L.W;
d.V4R.Z = this.V6R.X;
d.V5R.Z = this.V6R.Y;
d.V6R.Z = this.V6R.Z;
d.V7R.Z = this.V6R.W;
d.V0R.W = this.V7L.X;
d.V1R.W = this.V7L.Y;
d.V2R.W = this.V7L.Z;
d.V3R.W = this.V7L.W;
d.V4R.W = this.V7R.X;
d.V5R.W = this.V7R.Y;
d.V6R.W = this.V7R.Z;
d.V7R.W = this.V7R.W;
this.TransposeInplace_Scalar();
}
}
/// <summary>
/// Compares entire 8x8 block to a single scalar value.
/// Scalar inplace transpose implementation for <see cref="TransposeInplace"/>
/// </summary>
/// <param name="value">Value to compare to.</param>
public bool EqualsToScalar(int value)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
[MethodImpl(InliningOptions.ShortMethod)]
private void TransposeInplace_Scalar()
{
ref float elemRef = ref Unsafe.As<Block8x8F, float>(ref this);
// row #0
Swap(ref Unsafe.Add(ref elemRef, 1), ref Unsafe.Add(ref elemRef, 8));
Swap(ref Unsafe.Add(ref elemRef, 2), ref Unsafe.Add(ref elemRef, 16));
Swap(ref Unsafe.Add(ref elemRef, 3), ref Unsafe.Add(ref elemRef, 24));
Swap(ref Unsafe.Add(ref elemRef, 4), ref Unsafe.Add(ref elemRef, 32));
Swap(ref Unsafe.Add(ref elemRef, 5), ref Unsafe.Add(ref elemRef, 40));
Swap(ref Unsafe.Add(ref elemRef, 6), ref Unsafe.Add(ref elemRef, 48));
Swap(ref Unsafe.Add(ref elemRef, 7), ref Unsafe.Add(ref elemRef, 56));
// row #1
Swap(ref Unsafe.Add(ref elemRef, 10), ref Unsafe.Add(ref elemRef, 17));
Swap(ref Unsafe.Add(ref elemRef, 11), ref Unsafe.Add(ref elemRef, 25));
Swap(ref Unsafe.Add(ref elemRef, 12), ref Unsafe.Add(ref elemRef, 33));
Swap(ref Unsafe.Add(ref elemRef, 13), ref Unsafe.Add(ref elemRef, 41));
Swap(ref Unsafe.Add(ref elemRef, 14), ref Unsafe.Add(ref elemRef, 49));
Swap(ref Unsafe.Add(ref elemRef, 15), ref Unsafe.Add(ref elemRef, 57));
// row #2
Swap(ref Unsafe.Add(ref elemRef, 19), ref Unsafe.Add(ref elemRef, 26));
Swap(ref Unsafe.Add(ref elemRef, 20), ref Unsafe.Add(ref elemRef, 34));
Swap(ref Unsafe.Add(ref elemRef, 21), ref Unsafe.Add(ref elemRef, 42));
Swap(ref Unsafe.Add(ref elemRef, 22), ref Unsafe.Add(ref elemRef, 50));
Swap(ref Unsafe.Add(ref elemRef, 23), ref Unsafe.Add(ref elemRef, 58));
// row #3
Swap(ref Unsafe.Add(ref elemRef, 28), ref Unsafe.Add(ref elemRef, 35));
Swap(ref Unsafe.Add(ref elemRef, 29), ref Unsafe.Add(ref elemRef, 43));
Swap(ref Unsafe.Add(ref elemRef, 30), ref Unsafe.Add(ref elemRef, 51));
Swap(ref Unsafe.Add(ref elemRef, 31), ref Unsafe.Add(ref elemRef, 59));
// row #4
Swap(ref Unsafe.Add(ref elemRef, 37), ref Unsafe.Add(ref elemRef, 44));
Swap(ref Unsafe.Add(ref elemRef, 38), ref Unsafe.Add(ref elemRef, 52));
Swap(ref Unsafe.Add(ref elemRef, 39), ref Unsafe.Add(ref elemRef, 60));
// row #5
Swap(ref Unsafe.Add(ref elemRef, 46), ref Unsafe.Add(ref elemRef, 53));
Swap(ref Unsafe.Add(ref elemRef, 47), ref Unsafe.Add(ref elemRef, 61));
// row #6
Swap(ref Unsafe.Add(ref elemRef, 55), ref Unsafe.Add(ref elemRef, 62));
static void Swap(ref float a, ref float b)
{
const int equalityMask = unchecked((int)0b1111_1111_1111_1111_1111_1111_1111_1111);
var targetVector = Vector256.Create(value);
ref Vector256<float> blockStride = ref this.V0;
for (int i = 0; i < RowCount; i++)
{
Vector256<int> areEqual = Avx2.CompareEqual(Avx.ConvertToVector256Int32WithTruncation(Unsafe.Add(ref this.V0, i)), targetVector);
if (Avx2.MoveMask(areEqual.AsByte()) != equalityMask)
{
return false;
}
}
return true;
float tmp = a;
a = b;
b = tmp;
}
#endif
{
ref float scalars = ref Unsafe.As<Block8x8F, float>(ref this);
for (int i = 0; i < Size; i++)
{
if ((int)Unsafe.Add(ref scalars, i) != value)
{
return false;
}
}
}
return true;
}
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector<float> NormalizeAndRound(Vector<float> row, Vector<float> off, Vector<float> max)
{
row += off;
row = Vector.Max(row, Vector<float>.Zero);
row = Vector.Min(row, max);
return row.FastRound();
}
}
}

19
src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs

@ -58,11 +58,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
/// </summary>
private readonly HuffmanTable[] acHuffmanTables;
/// <summary>
/// The unzig data.
/// </summary>
private ZigZag dctZigZag;
private HuffmanScanBuffer scanBuffer;
private readonly SpectralConverter spectralConverter;
@ -80,7 +75,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
SpectralConverter converter,
CancellationToken cancellationToken)
{
this.dctZigZag = ZigZag.CreateUnzigTable();
this.stream = stream;
this.spectralConverter = converter;
this.cancellationToken = cancellationToken;
@ -483,7 +477,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
ref short blockDataRef = ref Unsafe.As<Block8x8, short>(ref block);
ref HuffmanScanBuffer buffer = ref this.scanBuffer;
ref ZigZag zigzag = ref this.dctZigZag;
// DC
int t = buffer.DecodeHuffman(ref dcTable);
@ -508,7 +501,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
i += r;
s = buffer.Receive(s);
Unsafe.Add(ref blockDataRef, zigzag[i++]) = (short)s;
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[i++]) = (short)s;
}
else
{
@ -562,7 +555,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
}
ref HuffmanScanBuffer buffer = ref this.scanBuffer;
ref ZigZag zigzag = ref this.dctZigZag;
int start = this.SpectralStart;
int end = this.SpectralEnd;
int low = this.SuccessiveLow;
@ -578,7 +570,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
if (s != 0)
{
s = buffer.Receive(s);
Unsafe.Add(ref blockDataRef, zigzag[i]) = (short)(s << low);
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[i]) = (short)(s << low);
}
else
{
@ -608,7 +600,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
// Refinement scan for these AC coefficients
ref HuffmanScanBuffer buffer = ref this.scanBuffer;
ref ZigZag zigzag = ref this.dctZigZag;
int start = this.SpectralStart;
int end = this.SpectralEnd;
@ -655,7 +646,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
do
{
ref short coef = ref Unsafe.Add(ref blockDataRef, zigzag[k]);
ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]);
if (coef != 0)
{
buffer.CheckBits();
@ -681,7 +672,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
if ((s != 0) && (k < 64))
{
Unsafe.Add(ref blockDataRef, zigzag[k]) = (short)s;
Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]) = (short)s;
}
}
}
@ -690,7 +681,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
{
for (; k <= end; k++)
{
ref short coef = ref Unsafe.Add(ref blockDataRef, zigzag[k]);
ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]);
if (coef != 0)
{

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/IRawJpegData.cs

@ -22,7 +22,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
IJpegComponent[] Components { get; }
/// <summary>
/// Gets the quantization tables, in zigzag order.
/// Gets the quantization tables, in natural order.
/// </summary>
Block8x8F[] QuantizationTables { get; }
}

26
src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs

@ -19,14 +19,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
public Block8x8F SourceBlock;
/// <summary>
/// Temporal block 1 to store intermediate and/or final computation results.
/// Temporal block to store intermediate computation results.
/// </summary>
public Block8x8F WorkspaceBlock1;
/// <summary>
/// Temporal block 2 to store intermediate and/or final computation results.
/// </summary>
public Block8x8F WorkspaceBlock2;
public Block8x8F WorkspaceBlock;
/// <summary>
/// The quantization table as <see cref="Block8x8F"/>.
@ -46,12 +41,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
public JpegBlockPostProcessor(IRawJpegData decoder, IJpegComponent component)
{
int qtIndex = component.QuantizationTableIndex;
this.DequantiazationTable = ZigZag.CreateDequantizationTable(ref decoder.QuantizationTables[qtIndex]);
this.DequantiazationTable = decoder.QuantizationTables[qtIndex];
this.subSamplingDivisors = component.SubSamplingDivisors;
this.SourceBlock = default;
this.WorkspaceBlock1 = default;
this.WorkspaceBlock2 = default;
this.WorkspaceBlock = default;
}
/// <summary>
@ -71,20 +65,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
int destAreaStride,
float maximumValue)
{
ref Block8x8F b = ref this.SourceBlock;
b.LoadFrom(ref sourceBlock);
ref Block8x8F block = ref this.SourceBlock;
block.LoadFrom(ref sourceBlock);
// Dequantize:
b.MultiplyInPlace(ref this.DequantiazationTable);
block.MultiplyInPlace(ref this.DequantiazationTable);
FastFloatingPointDCT.TransformIDCT(ref b, ref this.WorkspaceBlock1, ref this.WorkspaceBlock2);
FastFloatingPointDCT.TransformIDCT(ref block, ref this.WorkspaceBlock);
// To conform better to libjpeg we actually NEED TO loose precision here.
// This is because they store blocks as Int16 between all the operations.
// To be "more accurate", we need to emulate this by rounding!
this.WorkspaceBlock1.NormalizeColorsAndRoundInPlace(maximumValue);
block.NormalizeColorsAndRoundInPlace(maximumValue);
this.WorkspaceBlock1.ScaledCopyTo(
block.ScaledCopyTo(
ref destAreaOrigin,
destAreaStride,
this.subSamplingDivisors.Width,

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs

@ -39,6 +39,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
/// <param name="frame">The jpeg frame with the color space to convert to.</param>
/// <param name="jpegData">The raw JPEG data.</param>
/// <returns>The color converter.</returns>
public virtual JpegColorConverter GetColorConverter(JpegFrame frame, IRawJpegData jpegData) => JpegColorConverter.GetConverter(jpegData.ColorSpace, frame.Precision);
protected virtual JpegColorConverter GetColorConverter(JpegFrame frame, IRawJpegData jpegData) => JpegColorConverter.GetConverter(jpegData.ColorSpace, frame.Precision);
}
}

21
src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs

@ -5,10 +5,25 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
/// <summary>
/// A compiled look-up table representation of a huffmanSpec.
/// Each value maps to a int32 of which the 24 most significant bits hold the
/// codeword in bits and the 8 least significant bits hold the codeword size.
/// The maximum codeword size is 16 bits.
/// </summary>
/// <remarks>
/// <para>
/// Each value maps to a int32 of which the 24 most significant bits hold the
/// codeword in bits and the 8 least significant bits hold the codeword size.
/// </para>
/// <para>
/// Code value occupies 24 most significant bits as integer value.
/// This value is shifted to the MSB position for performance reasons.
/// For example, decimal value 10 is stored like this:
/// <code>
/// MSB LSB
/// 1010 0000 00000000 00000000 | 00000100
/// </code>
/// This was done to eliminate extra binary shifts in the encoder.
/// While code length is represented as 8 bit integer value
/// </para>
/// </remarks>
internal readonly struct HuffmanLut
{
/// <summary>
@ -54,7 +69,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
int len = i + 1;
for (int j = 0; j < spec.Count[i]; j++)
{
this.Values[spec.Values[k]] = len | (code << 8);
this.Values[spec.Values[k]] = len | (code << (32 - len));
code++;
k++;
}

512
src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs

@ -1,12 +1,11 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.IO;
using System.Numerics;
using System.Runtime.CompilerServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using System.Runtime.InteropServices;
using System.Threading;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
@ -16,49 +15,118 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
internal class HuffmanScanEncoder
{
/// <summary>
/// Compiled huffman tree to encode given values.
/// Maximum number of bytes encoded jpeg 8x8 block can occupy.
/// It's highly unlikely for block to occupy this much space - it's a theoretical limit.
/// </summary>
/// <remarks>Yields codewords by index consisting of [run length | bitsize].</remarks>
private HuffmanLut[] huffmanTables;
/// <remarks>
/// Where 16 is maximum huffman code binary length according to itu
/// specs. 10 is maximum value binary length, value comes from discrete
/// cosine tranform with value range: [-1024..1023]. Block stores
/// 8x8 = 64 values thus multiplication by 64. Then divided by 8 to get
/// the number of bytes. This value is then multiplied by
/// <see cref="MaxBytesPerBlockMultiplier"/> for performance reasons.
/// </remarks>
private const int MaxBytesPerBlock = (16 + 10) * 64 / 8 * MaxBytesPerBlockMultiplier;
/// <summary>
/// Number of bytes cached before being written to target stream via Stream.Write(byte[], offest, count).
/// Multiplier used within cache buffers size calculation.
/// </summary>
/// <remarks>
/// This is subject to change, 1024 seems to be the best value in terms of performance.
/// <see cref="Emit(int, int)"/> expects it to be at least 8 (see comments in method body).
/// <para>
/// Theoretically, <see cref="MaxBytesPerBlock"/> bytes buffer can fit
/// exactly one minimal coding unit. In reality, coding blocks occupy much
/// less space than the theoretical maximum - this can be exploited.
/// If temporal buffer size is multiplied by at least 2, second half of
/// the resulting buffer will be used as an overflow 'guard' if next
/// block would occupy maximum number of bytes. While first half may fit
/// many blocks before needing to flush.
/// </para>
/// <para>
/// This is subject to change. This can be equal to 1 but recomended
/// value is 2 or even greater - futher benchmarking needed.
/// </para>
/// </remarks>
private const int EmitBufferSizeInBytes = 1024;
private const int MaxBytesPerBlockMultiplier = 2;
/// <summary>
/// A buffer for reducing the number of stream writes when emitting Huffman tables.
/// <see cref="streamWriteBuffer"/> size multiplier.
/// </summary>
private readonly byte[] emitBuffer = new byte[EmitBufferSizeInBytes];
/// <remarks>
/// Jpeg specification requiers to insert 'stuff' bytes after each
/// 0xff byte value. Worst case scenarion is when all bytes are 0xff.
/// While it's highly unlikely (if not impossible) to get such
/// combination, it's theoretically possible so buffer size must be guarded.
/// </remarks>
private const int OutputBufferLengthMultiplier = 2;
/// <summary>
/// Number of filled bytes in <see cref="emitBuffer"/> buffer
/// Compiled huffman tree to encode given values.
/// </summary>
private int emitLen = 0;
/// <remarks>Yields codewords by index consisting of [run length | bitsize].</remarks>
private HuffmanLut[] huffmanTables;
/// <summary>
/// Emitted bits 'micro buffer' before being transferred to the <see cref="emitBuffer"/>.
/// </summary>
private int accumulatedBits;
private uint accumulatedBits;
/// <summary>
/// Buffer for temporal storage of huffman rle encoding bit data.
/// </summary>
/// <remarks>
/// Encoding bits are assembled to 4 byte unsigned integers and then copied to this buffer.
/// This process does NOT include inserting stuff bytes.
/// </remarks>
private readonly uint[] emitBuffer;
/// <summary>
/// Buffer for temporal storage which is then written to the output stream.
/// </summary>
/// <remarks>
/// Encoding bits from <see cref="emitBuffer"/> are copied to this byte buffer including stuff bytes.
/// </remarks>
private readonly byte[] streamWriteBuffer;
/// <summary>
/// Number of jagged bits stored in <see cref="accumulatedBits"/>
/// </summary>
private int bitCount;
private Block8x8F temporalBlock1;
private Block8x8F temporalBlock2;
private int emitWriteIndex;
private Block8x8 tempBlock;
/// <summary>
/// The output stream. All attempted writes after the first error become no-ops.
/// </summary>
private readonly Stream target;
public HuffmanScanEncoder(Stream outputStream) => this.target = outputStream;
/// <summary>
/// Initializes a new instance of the <see cref="HuffmanScanEncoder"/> class.
/// </summary>
/// <param name="blocksPerCodingUnit">Amount of encoded 8x8 blocks per single jpeg macroblock.</param>
/// <param name="outputStream">Output stream for saving encoded data.</param>
public HuffmanScanEncoder(int blocksPerCodingUnit, Stream outputStream)
{
int emitBufferByteLength = MaxBytesPerBlock * blocksPerCodingUnit;
this.emitBuffer = new uint[emitBufferByteLength / sizeof(uint)];
this.emitWriteIndex = this.emitBuffer.Length;
this.streamWriteBuffer = new byte[emitBufferByteLength * OutputBufferLengthMultiplier];
this.target = outputStream;
}
/// <summary>
/// Gets a value indicating whether <see cref="emitBuffer"/> is full
/// and must be flushed using <see cref="FlushToStream()"/>
/// before encoding next 8x8 coding block.
/// </summary>
private bool IsStreamFlushNeeded
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => this.emitWriteIndex < (uint)this.emitBuffer.Length / 2;
}
/// <summary>
/// Encodes the image with no subsampling.
@ -71,9 +139,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
public void Encode444<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
FastFloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);
FastFloatingPointDCT.AdjustToFDCT(ref chrominanceQuantTable);
var unzig = ZigZag.CreateUnzigTable();
this.huffmanTables = HuffmanLut.TheHuffmanLut;
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
@ -97,26 +166,28 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
ref luminanceQuantTable,
ref unzig);
ref luminanceQuantTable);
prevDCCb = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCb,
ref pixelConverter.Cb,
ref chrominanceQuantTable,
ref unzig);
ref chrominanceQuantTable);
prevDCCr = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCr,
ref pixelConverter.Cr,
ref chrominanceQuantTable,
ref unzig);
ref chrominanceQuantTable);
if (this.IsStreamFlushNeeded)
{
this.FlushToStream();
}
}
}
this.FlushInternalBuffer();
this.FlushRemainingBytes();
}
/// <summary>
@ -131,9 +202,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
public void Encode420<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
FastFloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);
FastFloatingPointDCT.AdjustToFDCT(ref chrominanceQuantTable);
var unzig = ZigZag.CreateUnzigTable();
this.huffmanTables = HuffmanLut.TheHuffmanLut;
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
@ -158,34 +230,35 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.YLeft,
ref luminanceQuantTable,
ref unzig);
ref luminanceQuantTable);
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.YRight,
ref luminanceQuantTable,
ref unzig);
ref luminanceQuantTable);
}
prevDCCb = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCb,
ref pixelConverter.Cb,
ref chrominanceQuantTable,
ref unzig);
ref chrominanceQuantTable);
prevDCCr = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCr,
ref pixelConverter.Cr,
ref chrominanceQuantTable,
ref unzig);
ref chrominanceQuantTable);
if (this.IsStreamFlushNeeded)
{
this.FlushToStream();
}
}
}
this.FlushInternalBuffer();
this.FlushRemainingBytes();
}
/// <summary>
@ -198,9 +271,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
public void EncodeGrayscale<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
FastFloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);
var unzig = ZigZag.CreateUnzigTable();
this.huffmanTables = HuffmanLut.TheHuffmanLut;
// ReSharper disable once InconsistentNaming
int prevDCY = 0;
@ -223,12 +296,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
ref luminanceQuantTable,
ref unzig);
ref luminanceQuantTable);
if (this.IsStreamFlushNeeded)
{
this.FlushToStream();
}
}
}
this.FlushInternalBuffer();
this.FlushRemainingBytes();
}
/// <summary>
@ -236,14 +313,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="luminanceQuantTable">Luminance quantization table provided by the callee.</param>
/// <param name="quantTable">Quantization table provided by the callee.</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
public void EncodeRgb<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, CancellationToken cancellationToken)
public void EncodeRgb<TPixel>(Image<TPixel> pixels, ref Block8x8F quantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
FastFloatingPointDCT.AdjustToFDCT(ref quantTable);
var unzig = ZigZag.CreateUnzigTable();
this.huffmanTables = HuffmanLut.TheHuffmanLut;
// ReSharper disable once InconsistentNaming
int prevDCR = 0, prevDCG = 0, prevDCB = 0;
@ -267,26 +344,28 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
QuantIndex.Luminance,
prevDCR,
ref pixelConverter.R,
ref luminanceQuantTable,
ref unzig);
ref quantTable);
prevDCG = this.WriteBlock(
QuantIndex.Luminance,
prevDCG,
ref pixelConverter.G,
ref luminanceQuantTable,
ref unzig);
ref quantTable);
prevDCB = this.WriteBlock(
QuantIndex.Luminance,
prevDCB,
ref pixelConverter.B,
ref luminanceQuantTable,
ref unzig);
ref quantTable);
if (this.IsStreamFlushNeeded)
{
this.FlushToStream();
}
}
}
this.FlushInternalBuffer();
this.FlushRemainingBytes();
}
/// <summary>
@ -296,47 +375,53 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
/// </summary>
/// <param name="index">The quantization table index.</param>
/// <param name="prevDC">The previous DC value.</param>
/// <param name="src">Source block</param>
/// <param name="quant">Quantization table</param>
/// <param name="unZig">The 8x8 Unzig block.</param>
/// <param name="block">Source block.</param>
/// <param name="quant">Quantization table.</param>
/// <returns>The <see cref="int"/>.</returns>
private int WriteBlock(
QuantIndex index,
int prevDC,
ref Block8x8F src,
ref Block8x8F quant,
ref ZigZag unZig)
ref Block8x8F block,
ref Block8x8F quant)
{
ref Block8x8F refTemp1 = ref this.temporalBlock1;
ref Block8x8F refTemp2 = ref this.temporalBlock2;
ref Block8x8 spectralBlock = ref this.tempBlock;
FastFloatingPointDCT.TransformFDCT(ref src, ref refTemp1, ref refTemp2);
// Shifting level from 0..255 to -128..127
block.AddInPlace(-128f);
Block8x8F.Quantize(ref refTemp1, ref refTemp2, ref quant, ref unZig);
// Discrete cosine transform
FastFloatingPointDCT.TransformFDCT(ref block);
// Quantization
Block8x8F.Quantize(ref block, ref spectralBlock, ref quant);
// Emit the DC delta.
int dc = (int)refTemp2[0];
this.EmitDirectCurrentTerm(this.huffmanTables[2 * (int)index].Values, dc - prevDC);
int dc = spectralBlock[0];
this.EmitHuffRLE(this.huffmanTables[2 * (int)index].Values, 0, dc - prevDC);
// Emit the AC components.
int[] acHuffTable = this.huffmanTables[(2 * (int)index) + 1].Values;
nint lastValuableIndex = spectralBlock.GetLastNonZeroIndex();
int runLength = 0;
int lastValuableIndex = GetLastValuableElementIndex(ref refTemp2);
for (int zig = 1; zig <= lastValuableIndex; zig++)
ref short blockRef = ref Unsafe.As<Block8x8, short>(ref spectralBlock);
for (nint zig = 1; zig <= lastValuableIndex; zig++)
{
int ac = (int)refTemp2[zig];
const int zeroRun1 = 1 << 4;
const int zeroRun16 = 16 << 4;
int ac = Unsafe.Add(ref blockRef, zig);
if (ac == 0)
{
runLength++;
runLength += zeroRun1;
}
else
{
while (runLength > 15)
while (runLength >= zeroRun16)
{
this.EmitHuff(acHuffTable, 0xf0);
runLength -= 16;
runLength -= zeroRun16;
}
this.EmitHuffRLE(acHuffTable, runLength, ac);
@ -356,100 +441,89 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
}
/// <summary>
/// Emits the least significant count of bits to the stream write buffer.
/// The precondition is bits
/// <example>
/// &lt; 1&lt;&lt;nBits &amp;&amp; nBits &lt;= 16
/// </example>
/// .
/// Emits the most significant count of bits to the buffer.
/// </summary>
/// <param name="bits">The packed bits.</param>
/// <param name="count">The number of bits</param>
/// <remarks>
/// <para>
/// Supports up to 32 count of bits but, generally speaking, jpeg
/// standard assures that there won't be more than 16 bits per single
/// value.
/// </para>
/// <para>
/// Emitting algorithm uses 3 intermediate buffers for caching before
/// writing to the stream:
/// <list type="number">
/// <item>
/// <term>uint32</term>
/// <description>
/// Bit buffer. Encoded spectral values can occupy up to 16 bits, bits
/// are assembled to whole bytes via this intermediate buffer.
/// </description>
/// </item>
/// <item>
/// <term>uint32[]</term>
/// <description>
/// Assembled bytes from uint32 buffer are saved into this buffer.
/// uint32 buffer values are saved using indices from the last to the first.
/// As bytes are saved to the memory as 4-byte packages endianness matters:
/// Jpeg stream is big-endian, indexing buffer bytes from the last index to the
/// first eliminates all operations to extract separate bytes. This only works for
/// little-endian machines (there are no known examples of big-endian users atm).
/// For big-endians this approach is slower due to the separate byte extraction.
/// </description>
/// </item>
/// <item>
/// <term>byte[]</term>
/// <description>
/// Byte buffer used only during <see cref="FlushToStream(int)"/> method.
/// </description>
/// </item>
/// </list>
/// </para>
/// </remarks>
/// <param name="bits">Bits to emit, must be shifted to the left.</param>
/// <param name="count">Bits count stored in the bits parameter.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void Emit(int bits, int count)
private void Emit(uint bits, int count)
{
this.accumulatedBits |= bits >> this.bitCount;
count += this.bitCount;
bits <<= 32 - count;
bits |= this.accumulatedBits;
// Only write if more than 8 bits.
if (count >= 8)
if (count >= 32)
{
// Track length
while (count >= 8)
{
byte b = (byte)(bits >> 24);
this.emitBuffer[this.emitLen++] = b;
// Adding stuff byte
// This is because by JPEG standard scan data can contain JPEG markers (indicated by the 0xFF byte, followed by a non-zero byte)
// Considering this every 0xFF byte must be followed by 0x00 padding byte to signal that this is not a marker
if (b == byte.MaxValue)
{
this.emitBuffer[this.emitLen++] = byte.MinValue;
}
bits <<= 8;
count -= 8;
}
this.emitBuffer[--this.emitWriteIndex] = this.accumulatedBits;
this.accumulatedBits = bits << (32 - this.bitCount);
// This can emit 4 times of:
// 1 byte guaranteed
// 1 extra byte.MinValue byte if previous one was byte.MaxValue
// Thus writing (1 + 1) * 4 = 8 bytes max
// So we must check if emit buffer has extra 8 bytes, if not - call stream.Write
if (this.emitLen > EmitBufferSizeInBytes - 8)
{
this.target.Write(this.emitBuffer, 0, this.emitLen);
this.emitLen = 0;
}
count -= 32;
}
this.accumulatedBits = bits;
this.bitCount = count;
}
/// <summary>
/// Emits the given value with the given Huffman encoder.
/// Emits the given value with the given Huffman table.
/// </summary>
/// <param name="table">Compiled Huffman spec values.</param>
/// <param name="value">The value to encode.</param>
/// <param name="table">Huffman table.</param>
/// <param name="value">Value to encode.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitHuff(int[] table, int value)
{
int x = table[value];
this.Emit(x >> 8, x & 0xff);
}
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitDirectCurrentTerm(int[] table, int value)
{
int a = value;
int b = value;
if (a < 0)
{
a = -value;
b = value - 1;
}
int bt = GetHuffmanEncodingLength((uint)a);
this.EmitHuff(table, bt);
if (bt > 0)
{
this.Emit(b & ((1 << bt) - 1), bt);
}
this.Emit((uint)x & 0xffff_ff00u, x & 0xff);
}
/// <summary>
/// Emits a run of runLength copies of value encoded with the given Huffman encoder.
/// Emits given value via huffman rle encoding.
/// </summary>
/// <param name="table">Compiled Huffman spec values.</param>
/// <param name="runLength">The number of copies to encode.</param>
/// <param name="value">The value to encode.</param>
/// <param name="table">Huffman table.</param>
/// <param name="runLength">The number of preceding zeroes, preshifted by 4 to the left.</param>
/// <param name="value">Value to encode.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitHuffRLE(int[] table, int runLength, int value)
{
DebugGuard.IsTrue((runLength & 0xf) == 0, $"{nameof(runLength)} parameter must be shifted to the left by 4 bits");
int a = value;
int b = value;
if (a < 0)
@ -458,25 +532,18 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
b = value - 1;
}
int bt = GetHuffmanEncodingLength((uint)a);
int valueLen = GetHuffmanEncodingLength((uint)a);
this.EmitHuff(table, (runLength << 4) | bt);
this.Emit(b & ((1 << bt) - 1), bt);
}
// Huffman prefix code
int huffPackage = table[runLength | valueLen];
int prefixLen = huffPackage & 0xff;
uint prefix = (uint)huffPackage & 0xffff_0000u;
/// <summary>
/// Writes remaining bytes from internal buffer to the target stream.
/// </summary>
/// <remarks>Pads last byte with 1's if necessary</remarks>
private void FlushInternalBuffer()
{
// pad last byte with 1's
int padBitsCount = 8 - (this.bitCount % 8);
if (padBitsCount != 0)
{
this.Emit((1 << padBitsCount) - 1, padBitsCount);
this.target.Write(this.emitBuffer, 0, this.emitLen);
}
// Actual encoded value
uint encodedValue = (uint)b << (32 - valueLen);
// Doing two binary shifts to get rid of leading 1's in negative value case
this.Emit(prefix | (encodedValue >> prefixLen), prefixLen + valueLen);
}
/// <summary>
@ -498,7 +565,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
// Lzcnt would return 32 for input value of 0 - no need to check that with branching
// Fallback code if Lzcnt is not supported still use if-check
// But most modern CPUs support this instruction so this should not be a problem
return 32 - System.Numerics.BitOperations.LeadingZeroCount(value);
return 32 - BitOperations.LeadingZeroCount(value);
#else
// Ideally:
// if 0 - return 0 in this case
@ -515,65 +582,108 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
}
/// <summary>
/// Returns index of the last non-zero element in given mcu block.
/// If all values of the mcu block are zero, this method might return different results depending on the runtime and hardware support.
/// This is jpeg mcu specific code, mcu[0] stores a dc value which will be encoded outside of the loop.
/// This method is guaranteed to return either -1 or 0 if all elements are zero.
/// General method for flushing cached spectral data bytes to
/// the ouput stream respecting stuff bytes.
/// </summary>
/// <remarks>
/// This is an internal operation supposed to be used only in <see cref="HuffmanScanEncoder"/> class for jpeg encoding.
/// Bytes cached via <see cref="Emit"/> are stored in 4-bytes blocks
/// which makes this method endianness dependent.
/// </remarks>
/// <param name="mcu">Mcu block.</param>
/// <returns>Index of the last non-zero element.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
internal static int GetLastValuableElementIndex(ref Block8x8F mcu)
private void FlushToStream(int endIndex)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
const int equalityMask = unchecked((int)0b1111_1111_1111_1111_1111_1111_1111_1111);
Span<byte> emitBytes = MemoryMarshal.AsBytes(this.emitBuffer.AsSpan());
Vector256<int> zero8 = Vector256<int>.Zero;
int writeIdx = 0;
int startIndex = emitBytes.Length - 1;
ref Vector256<float> mcuStride = ref mcu.V0;
for (int i = 7; i >= 0; i--)
// Some platforms may fail to eliminate this if-else branching
// Even if it happens - buffer is flushed in big packs,
// branching overhead shouldn't be noticeable
if (BitConverter.IsLittleEndian)
{
// For little endian case bytes are ordered and can be
// safely written to the stream with stuff bytes
// First byte is cached on the most significant index
// so we are going from the end of the array to its beginning:
// ... [ double word #1 ] [ double word #0 ]
// ... [idx3|idx2|idx1|idx0] [idx3|idx2|idx1|idx0]
for (int i = startIndex; i >= endIndex; i--)
{
int areEqual = Avx2.MoveMask(Avx2.CompareEqual(Avx.ConvertToVector256Int32(Unsafe.Add(ref mcuStride, i)), zero8).AsByte());
byte value = emitBytes[i];
this.streamWriteBuffer[writeIdx++] = value;
// we do not know for sure if this stride contain all non-zero elements or if it has some trailing zeros
if (areEqual != equalityMask)
// Inserting stuff byte
if (value == 0xff)
{
// last index in the stride, we go from the end to the start of the stride
int startIndex = i * 8;
int index = startIndex + 7;
ref float elemRef = ref Unsafe.As<Block8x8F, float>(ref mcu);
while (index >= startIndex && (int)Unsafe.Add(ref elemRef, index) == 0)
{
index--;
}
// this implementation will return -1 if all ac components are zero and dc are zero
return index;
this.streamWriteBuffer[writeIdx++] = 0x00;
}
}
return -1;
}
else
#endif
{
int index = Block8x8F.Size - 1;
ref float elemRef = ref Unsafe.As<Block8x8F, float>(ref mcu);
while (index > 0 && (int)Unsafe.Add(ref elemRef, index) == 0)
// For big endian case bytes are ordered in 4-byte packs
// which are ordered like bytes in the little endian case by in 4-byte packs:
// ... [ double word #1 ] [ double word #0 ]
// ... [idx0|idx1|idx2|idx3] [idx0|idx1|idx2|idx3]
// So we must write each 4-bytes in 'natural order'
for (int i = startIndex; i >= endIndex; i -= 4)
{
index--;
}
// This loop is caused by the nature of underlying byte buffer
// implementation and indeed causes performace by somewhat 5%
// compared to little endian scenario
// Even with this performance drop this cached buffer implementation
// is faster than individually writing bytes using binary shifts and binary and(s)
for (int j = i - 3; j <= i; j++)
{
byte value = emitBytes[j];
this.streamWriteBuffer[writeIdx++] = value;
// this implementation will return 0 if all ac components and dc are zero
return index;
// Inserting stuff byte
if (value == 0xff)
{
this.streamWriteBuffer[writeIdx++] = 0x00;
}
}
}
}
this.target.Write(this.streamWriteBuffer, 0, writeIdx);
}
/// <summary>
/// Flushes spectral data bytes after encoding all channel blocks
/// in a single jpeg macroblock using <see cref="WriteBlock"/>.
/// </summary>
/// <remarks>
/// This must be called only if <see cref="IsStreamFlushNeeded"/> is true
/// only during the macroblocks encoding routine.
/// </remarks>
private void FlushToStream()
{
this.FlushToStream(this.emitWriteIndex * 4);
this.emitWriteIndex = this.emitBuffer.Length;
}
/// <summary>
/// Flushes final cached bits to the stream padding 1's to
/// complement full bytes.
/// </summary>
/// <remarks>
/// This must be called only once at the end of the encoding routine.
/// <see cref="IsStreamFlushNeeded"/> check is not needed.
/// </remarks>
[MethodImpl(InliningOptions.ShortMethod)]
private void FlushRemainingBytes()
{
// Padding all 4 bytes with 1's while not corrupting initial bits stored in accumulatedBits
// And writing only valuable count of bytes count we want to write to the output stream
int valuableBytesCount = (int)Numerics.DivideCeil((uint)this.bitCount, 8);
uint packedBytes = this.accumulatedBits | (uint.MaxValue >> this.bitCount);
this.emitBuffer[--this.emitWriteIndex] = packedBytes;
// Flush cached bytes to the output stream with padding bits
this.FlushToStream((this.emitWriteIndex * 4) - 4 + valuableBytesCount);
}
}
}

161
src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs

@ -0,0 +1,161 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal static partial class FastFloatingPointDCT
{
#pragma warning disable SA1310, SA1311, IDE1006 // naming rules violation warnings
private static readonly Vector256<float> mm256_F_0_7071 = Vector256.Create(0.707106781f);
private static readonly Vector256<float> mm256_F_0_3826 = Vector256.Create(0.382683433f);
private static readonly Vector256<float> mm256_F_0_5411 = Vector256.Create(0.541196100f);
private static readonly Vector256<float> mm256_F_1_3065 = Vector256.Create(1.306562965f);
private static readonly Vector256<float> mm256_F_1_1758 = Vector256.Create(1.175876f);
private static readonly Vector256<float> mm256_F_n1_9615 = Vector256.Create(-1.961570560f);
private static readonly Vector256<float> mm256_F_n0_3901 = Vector256.Create(-0.390180644f);
private static readonly Vector256<float> mm256_F_n0_8999 = Vector256.Create(-0.899976223f);
private static readonly Vector256<float> mm256_F_n2_5629 = Vector256.Create(-2.562915447f);
private static readonly Vector256<float> mm256_F_0_2986 = Vector256.Create(0.298631336f);
private static readonly Vector256<float> mm256_F_2_0531 = Vector256.Create(2.053119869f);
private static readonly Vector256<float> mm256_F_3_0727 = Vector256.Create(3.072711026f);
private static readonly Vector256<float> mm256_F_1_5013 = Vector256.Create(1.501321110f);
private static readonly Vector256<float> mm256_F_n1_8477 = Vector256.Create(-1.847759065f);
private static readonly Vector256<float> mm256_F_0_7653 = Vector256.Create(0.765366865f);
#pragma warning restore SA1310, SA1311, IDE1006
/// <summary>
/// Apply floating point FDCT inplace using simd operations.
/// </summary>
/// <param name="block">Input matrix.</param>
private static void ForwardTransform_Avx(ref Block8x8F block)
{
DebugGuard.IsTrue(Avx.IsSupported, "Avx support is required to execute this operation.");
// First pass - process rows
block.TransposeInplace();
FDCT8x8_Avx(ref block);
// Second pass - process columns
block.TransposeInplace();
FDCT8x8_Avx(ref block);
}
/// <summary>
/// Apply 1D floating point FDCT inplace using AVX operations on 8x8 matrix.
/// </summary>
/// <remarks>
/// Requires Avx support.
/// </remarks>
/// <param name="block">Input matrix.</param>
public static void FDCT8x8_Avx(ref Block8x8F block)
{
DebugGuard.IsTrue(Avx.IsSupported, "Avx support is required to execute this operation.");
Vector256<float> tmp0 = Avx.Add(block.V0, block.V7);
Vector256<float> tmp7 = Avx.Subtract(block.V0, block.V7);
Vector256<float> tmp1 = Avx.Add(block.V1, block.V6);
Vector256<float> tmp6 = Avx.Subtract(block.V1, block.V6);
Vector256<float> tmp2 = Avx.Add(block.V2, block.V5);
Vector256<float> tmp5 = Avx.Subtract(block.V2, block.V5);
Vector256<float> tmp3 = Avx.Add(block.V3, block.V4);
Vector256<float> tmp4 = Avx.Subtract(block.V3, block.V4);
// Even part
Vector256<float> tmp10 = Avx.Add(tmp0, tmp3);
Vector256<float> tmp13 = Avx.Subtract(tmp0, tmp3);
Vector256<float> tmp11 = Avx.Add(tmp1, tmp2);
Vector256<float> tmp12 = Avx.Subtract(tmp1, tmp2);
block.V0 = Avx.Add(tmp10, tmp11);
block.V4 = Avx.Subtract(tmp10, tmp11);
Vector256<float> z1 = Avx.Multiply(Avx.Add(tmp12, tmp13), mm256_F_0_7071);
block.V2 = Avx.Add(tmp13, z1);
block.V6 = Avx.Subtract(tmp13, z1);
// Odd part
tmp10 = Avx.Add(tmp4, tmp5);
tmp11 = Avx.Add(tmp5, tmp6);
tmp12 = Avx.Add(tmp6, tmp7);
Vector256<float> z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), mm256_F_0_3826);
Vector256<float> z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_0_5411, tmp10);
Vector256<float> z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_1_3065, tmp12);
Vector256<float> z3 = Avx.Multiply(tmp11, mm256_F_0_7071);
Vector256<float> z11 = Avx.Add(tmp7, z3);
Vector256<float> z13 = Avx.Subtract(tmp7, z3);
block.V5 = Avx.Add(z13, z2);
block.V3 = Avx.Subtract(z13, z2);
block.V1 = Avx.Add(z11, z4);
block.V7 = Avx.Subtract(z11, z4);
}
/// <summary>
/// Combined operation of <see cref="IDCT8x4_LeftPart(ref Block8x8F, ref Block8x8F)"/> and <see cref="IDCT8x4_RightPart(ref Block8x8F, ref Block8x8F)"/>
/// using AVX commands.
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
public static void IDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d)
{
Debug.Assert(Avx.IsSupported, "AVX is required to execute this method");
Vector256<float> my1 = s.V1;
Vector256<float> my7 = s.V7;
Vector256<float> mz0 = Avx.Add(my1, my7);
Vector256<float> my3 = s.V3;
Vector256<float> mz2 = Avx.Add(my3, my7);
Vector256<float> my5 = s.V5;
Vector256<float> mz1 = Avx.Add(my3, my5);
Vector256<float> mz3 = Avx.Add(my1, my5);
Vector256<float> mz4 = Avx.Multiply(Avx.Add(mz0, mz1), mm256_F_1_1758);
mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz2, mm256_F_n1_9615);
mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz3, mm256_F_n0_3901);
mz0 = Avx.Multiply(mz0, mm256_F_n0_8999);
mz1 = Avx.Multiply(mz1, mm256_F_n2_5629);
Vector256<float> mb3 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my7, mm256_F_0_2986), mz2);
Vector256<float> mb2 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my5, mm256_F_2_0531), mz3);
Vector256<float> mb1 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my3, mm256_F_3_0727), mz2);
Vector256<float> mb0 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my1, mm256_F_1_5013), mz3);
Vector256<float> my2 = s.V2;
Vector256<float> my6 = s.V6;
mz4 = Avx.Multiply(Avx.Add(my2, my6), mm256_F_0_5411);
Vector256<float> my0 = s.V0;
Vector256<float> my4 = s.V4;
mz0 = Avx.Add(my0, my4);
mz1 = Avx.Subtract(my0, my4);
mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my6, mm256_F_n1_8477);
mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my2, mm256_F_0_7653);
my0 = Avx.Add(mz0, mz3);
my3 = Avx.Subtract(mz0, mz3);
my1 = Avx.Add(mz1, mz2);
my2 = Avx.Subtract(mz1, mz2);
d.V0 = Avx.Add(my0, mb0);
d.V7 = Avx.Subtract(my0, mb0);
d.V1 = Avx.Add(my1, mb1);
d.V6 = Avx.Subtract(my1, mb1);
d.V2 = Avx.Add(my2, mb2);
d.V5 = Avx.Subtract(my2, mb2);
d.V3 = Avx.Add(my3, mb3);
d.V4 = Avx.Subtract(my3, mb3);
}
}
}
#endif

561
src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs

@ -1,11 +1,9 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
@ -19,283 +17,304 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
#pragma warning disable SA1310 // FieldNamesMustNotContainUnderscore
private const float C_1_175876 = 1.175875602f;
private const float C_1_961571 = -1.961570560f;
private const float C_0_390181 = -0.390180644f;
private const float C_0_899976 = -0.899976223f;
private const float C_2_562915 = -2.562915447f;
private const float C_0_298631 = 0.298631336f;
private const float C_2_053120 = 2.053119869f;
private const float C_3_072711 = 3.072711026f;
private const float C_1_501321 = 1.501321110f;
private const float C_0_541196 = 0.541196100f;
private const float C_1_847759 = -1.847759065f;
private const float C_0_765367 = 0.765366865f;
private const float C_0_125 = 0.1250f;
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector256<float> C_V_0_5411 = Vector256.Create(0.541196f);
private static readonly Vector256<float> C_V_1_3065 = Vector256.Create(1.306563f);
private static readonly Vector256<float> C_V_1_1758 = Vector256.Create(1.175876f);
private static readonly Vector256<float> C_V_0_7856 = Vector256.Create(0.785695f);
private static readonly Vector256<float> C_V_1_3870 = Vector256.Create(1.387040f);
private static readonly Vector256<float> C_V_0_2758 = Vector256.Create(0.275899f);
private static readonly Vector256<float> C_V_n1_9615 = Vector256.Create(-1.961570560f);
private static readonly Vector256<float> C_V_n0_3901 = Vector256.Create(-0.390180644f);
private static readonly Vector256<float> C_V_n0_8999 = Vector256.Create(-0.899976223f);
private static readonly Vector256<float> C_V_n2_5629 = Vector256.Create(-2.562915447f);
private static readonly Vector256<float> C_V_0_2986 = Vector256.Create(0.298631336f);
private static readonly Vector256<float> C_V_2_0531 = Vector256.Create(2.053119869f);
private static readonly Vector256<float> C_V_3_0727 = Vector256.Create(3.072711026f);
private static readonly Vector256<float> C_V_1_5013 = Vector256.Create(1.501321110f);
private static readonly Vector256<float> C_V_n1_8477 = Vector256.Create(-1.847759065f);
private static readonly Vector256<float> C_V_0_7653 = Vector256.Create(0.765366865f);
private static readonly Vector256<float> C_V_InvSqrt2 = Vector256.Create(0.707107f);
#endif
#pragma warning disable SA1311, IDE1006 // naming rules violation warnings
private static readonly Vector4 mm128_F_0_7071 = new Vector4(0.707106781f);
private static readonly Vector4 mm128_F_0_3826 = new Vector4(0.382683433f);
private static readonly Vector4 mm128_F_0_5411 = new Vector4(0.541196100f);
private static readonly Vector4 mm128_F_1_3065 = new Vector4(1.306562965f);
#pragma warning restore SA1311, IDE1006
#pragma warning restore SA1310 // FieldNamesMustNotContainUnderscore
private static readonly Vector4 InvSqrt2 = new Vector4(0.707107f);
/// <summary>
/// Original:
/// <see>
/// <cref>https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L15</cref>
/// </see>
/// Gets reciprocal coefficients for jpeg quantization tables calculation.
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
public static void FDCT8x4_LeftPart(ref Block8x8F s, ref Block8x8F d)
/// <remarks>
/// <para>
/// Current FDCT implementation expects its results to be multiplied by
/// a reciprocal quantization table. To get 8x8 reciprocal block values in this
/// table must be divided by quantization table values scaled with quality settings.
/// </para>
/// <para>
/// These values were calculates with this formula:
/// <code>
/// value[row * 8 + col] = scalefactor[row] * scalefactor[col] * 8;
/// </code>
/// Where:
/// <code>
/// scalefactor[0] = 1
/// </code>
/// <code>
/// scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
/// </code>
/// Values are also scaled by 8 so DCT code won't do extra division/multiplication.
/// </para>
/// </remarks>
internal static readonly float[] DctReciprocalAdjustmentCoefficients = new float[]
{
Vector4 c0 = s.V0L;
Vector4 c1 = s.V7L;
Vector4 t0 = c0 + c1;
Vector4 t7 = c0 - c1;
c1 = s.V6L;
c0 = s.V1L;
Vector4 t1 = c0 + c1;
Vector4 t6 = c0 - c1;
c1 = s.V5L;
c0 = s.V2L;
Vector4 t2 = c0 + c1;
Vector4 t5 = c0 - c1;
c0 = s.V3L;
c1 = s.V4L;
Vector4 t3 = c0 + c1;
Vector4 t4 = c0 - c1;
c0 = t0 + t3;
Vector4 c3 = t0 - t3;
c1 = t1 + t2;
Vector4 c2 = t1 - t2;
d.V0L = c0 + c1;
d.V4L = c0 - c1;
float w0 = 0.541196f;
float w1 = 1.306563f;
d.V2L = (w0 * c2) + (w1 * c3);
d.V6L = (w0 * c3) - (w1 * c2);
w0 = 1.175876f;
w1 = 0.785695f;
c3 = (w0 * t4) + (w1 * t7);
c0 = (w0 * t7) - (w1 * t4);
w0 = 1.387040f;
w1 = 0.275899f;
c2 = (w0 * t5) + (w1 * t6);
c1 = (w0 * t6) - (w1 * t5);
d.V3L = c0 - c2;
d.V5L = c3 - c1;
float invsqrt2 = 0.707107f;
c0 = (c0 + c2) * invsqrt2;
c3 = (c3 + c1) * invsqrt2;
d.V1L = c0 + c3;
d.V7L = c0 - c3;
}
0.125f, 0.09011998f, 0.09567086f, 0.10630376f, 0.125f, 0.15909483f, 0.23096988f, 0.45306373f,
0.09011998f, 0.064972885f, 0.068974845f, 0.07664074f, 0.09011998f, 0.11470097f, 0.16652f, 0.32664075f,
0.09567086f, 0.068974845f, 0.07322331f, 0.081361376f, 0.09567086f, 0.121765904f, 0.17677669f, 0.34675997f,
0.10630376f, 0.07664074f, 0.081361376f, 0.09040392f, 0.10630376f, 0.13529903f, 0.19642374f, 0.38529903f,
0.125f, 0.09011998f, 0.09567086f, 0.10630376f, 0.125f, 0.15909483f, 0.23096988f, 0.45306373f,
0.15909483f, 0.11470097f, 0.121765904f, 0.13529903f, 0.15909483f, 0.2024893f, 0.2939689f, 0.5766407f,
0.23096988f, 0.16652f, 0.17677669f, 0.19642374f, 0.23096988f, 0.2939689f, 0.4267767f, 0.8371526f,
0.45306373f, 0.32664075f, 0.34675997f, 0.38529903f, 0.45306373f, 0.5766407f, 0.8371526f, 1.642134f,
};
/// <summary>
/// Original:
/// <see>
/// <cref>https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L15</cref>
/// </see>
/// Adjusts given quantization table to be complient with FDCT implementation.
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
public static void FDCT8x4_RightPart(ref Block8x8F s, ref Block8x8F d)
/// <remarks>
/// See <see cref="DctReciprocalAdjustmentCoefficients"/> docs for explanation.
/// </remarks>
/// <param name="quantizationtable">Quantization table to adjust.</param>
public static void AdjustToFDCT(ref Block8x8F quantizationtable)
{
Vector4 c0 = s.V0R;
Vector4 c1 = s.V7R;
Vector4 t0 = c0 + c1;
Vector4 t7 = c0 - c1;
c1 = s.V6R;
c0 = s.V1R;
Vector4 t1 = c0 + c1;
Vector4 t6 = c0 - c1;
c1 = s.V5R;
c0 = s.V2R;
Vector4 t2 = c0 + c1;
Vector4 t5 = c0 - c1;
c0 = s.V3R;
c1 = s.V4R;
Vector4 t3 = c0 + c1;
Vector4 t4 = c0 - c1;
c0 = t0 + t3;
Vector4 c3 = t0 - t3;
c1 = t1 + t2;
Vector4 c2 = t1 - t2;
d.V0R = c0 + c1;
d.V4R = c0 - c1;
float w0 = 0.541196f;
float w1 = 1.306563f;
d.V2R = (w0 * c2) + (w1 * c3);
d.V6R = (w0 * c3) - (w1 * c2);
w0 = 1.175876f;
w1 = 0.785695f;
c3 = (w0 * t4) + (w1 * t7);
c0 = (w0 * t7) - (w1 * t4);
w0 = 1.387040f;
w1 = 0.275899f;
c2 = (w0 * t5) + (w1 * t6);
c1 = (w0 * t6) - (w1 * t5);
d.V3R = c0 - c2;
d.V5R = c3 - c1;
c0 = (c0 + c2) * InvSqrt2;
c3 = (c3 + c1) * InvSqrt2;
d.V1R = c0 + c3;
d.V7R = c0 - c3;
for (int i = 0; i < Block8x8F.Size; i++)
{
quantizationtable[i] = DctReciprocalAdjustmentCoefficients[i] / quantizationtable[i];
}
}
/// <summary>
/// Combined operation of <see cref="FDCT8x4_LeftPart(ref Block8x8F, ref Block8x8F)"/> and <see cref="FDCT8x4_RightPart(ref Block8x8F, ref Block8x8F)"/>
/// using AVX commands.
/// Apply 2D floating point FDCT inplace.
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
public static void FDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d)
/// <param name="block">Input matrix.</param>
public static void TransformFDCT(ref Block8x8F block)
{
#if SUPPORTS_RUNTIME_INTRINSICS
Debug.Assert(Avx.IsSupported, "AVX is required to execute this method");
Vector256<float> t0 = Avx.Add(s.V0, s.V7);
Vector256<float> t7 = Avx.Subtract(s.V0, s.V7);
Vector256<float> t1 = Avx.Add(s.V1, s.V6);
Vector256<float> t6 = Avx.Subtract(s.V1, s.V6);
Vector256<float> t2 = Avx.Add(s.V2, s.V5);
Vector256<float> t5 = Avx.Subtract(s.V2, s.V5);
Vector256<float> t3 = Avx.Add(s.V3, s.V4);
Vector256<float> t4 = Avx.Subtract(s.V3, s.V4);
Vector256<float> c0 = Avx.Add(t0, t3);
Vector256<float> c1 = Avx.Add(t1, t2);
// 0 4
d.V0 = Avx.Add(c0, c1);
d.V4 = Avx.Subtract(c0, c1);
Vector256<float> c3 = Avx.Subtract(t0, t3);
Vector256<float> c2 = Avx.Subtract(t1, t2);
// 2 6
d.V2 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(c2, C_V_0_5411), c3, C_V_1_3065);
d.V6 = SimdUtils.HwIntrinsics.MultiplySubstract(Avx.Multiply(c2, C_V_1_3065), c3, C_V_0_5411);
c3 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(t4, C_V_1_1758), t7, C_V_0_7856);
c0 = SimdUtils.HwIntrinsics.MultiplySubstract(Avx.Multiply(t4, C_V_0_7856), t7, C_V_1_1758);
c2 = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(t5, C_V_1_3870), C_V_0_2758, t6);
c1 = SimdUtils.HwIntrinsics.MultiplySubstract(Avx.Multiply(C_V_0_2758, t5), t6, C_V_1_3870);
// 3 5
d.V3 = Avx.Subtract(c0, c2);
d.V5 = Avx.Subtract(c3, c1);
c0 = Avx.Multiply(Avx.Add(c0, c2), C_V_InvSqrt2);
c3 = Avx.Multiply(Avx.Add(c3, c1), C_V_InvSqrt2);
// 1 7
d.V1 = Avx.Add(c0, c3);
d.V7 = Avx.Subtract(c0, c3);
if (Avx.IsSupported)
{
ForwardTransform_Avx(ref block);
}
else
#endif
if (Vector.IsHardwareAccelerated)
{
ForwardTransform_Vector4(ref block);
}
else
{
ForwardTransform_Scalar(ref block);
}
}
/// <summary>
/// Performs 8x8 matrix Forward Discrete Cosine Transform
/// Apply 2D floating point FDCT inplace using scalar operations.
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
public static void FDCT8x8(ref Block8x8F s, ref Block8x8F d)
/// <remarks>
/// Ported from libjpeg-turbo https://github.com/libjpeg-turbo/libjpeg-turbo/blob/main/jfdctflt.c.
/// </remarks>
/// <param name="block">Input matrix.</param>
private static void ForwardTransform_Scalar(ref Block8x8F block)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
const int dctSize = 8;
float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
float tmp10, tmp11, tmp12, tmp13;
float z1, z2, z3, z4, z5, z11, z13;
// First pass - process rows
ref float dataRef = ref Unsafe.As<Block8x8F, float>(ref block);
for (int ctr = 7; ctr >= 0; ctr--)
{
FDCT8x8_Avx(ref s, ref d);
tmp0 = Unsafe.Add(ref dataRef, 0) + Unsafe.Add(ref dataRef, 7);
tmp7 = Unsafe.Add(ref dataRef, 0) - Unsafe.Add(ref dataRef, 7);
tmp1 = Unsafe.Add(ref dataRef, 1) + Unsafe.Add(ref dataRef, 6);
tmp6 = Unsafe.Add(ref dataRef, 1) - Unsafe.Add(ref dataRef, 6);
tmp2 = Unsafe.Add(ref dataRef, 2) + Unsafe.Add(ref dataRef, 5);
tmp5 = Unsafe.Add(ref dataRef, 2) - Unsafe.Add(ref dataRef, 5);
tmp3 = Unsafe.Add(ref dataRef, 3) + Unsafe.Add(ref dataRef, 4);
tmp4 = Unsafe.Add(ref dataRef, 3) - Unsafe.Add(ref dataRef, 4);
// Even part
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
Unsafe.Add(ref dataRef, 0) = tmp10 + tmp11;
Unsafe.Add(ref dataRef, 4) = tmp10 - tmp11;
z1 = (tmp12 + tmp13) * 0.707106781f;
Unsafe.Add(ref dataRef, 2) = tmp13 + z1;
Unsafe.Add(ref dataRef, 6) = tmp13 - z1;
// Odd part
tmp10 = tmp4 + tmp5;
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;
z5 = (tmp10 - tmp12) * 0.382683433f;
z2 = (0.541196100f * tmp10) + z5;
z4 = (1.306562965f * tmp12) + z5;
z3 = tmp11 * 0.707106781f;
z11 = tmp7 + z3;
z13 = tmp7 - z3;
Unsafe.Add(ref dataRef, 5) = z13 + z2;
Unsafe.Add(ref dataRef, 3) = z13 - z2;
Unsafe.Add(ref dataRef, 1) = z11 + z4;
Unsafe.Add(ref dataRef, 7) = z11 - z4;
dataRef = ref Unsafe.Add(ref dataRef, dctSize);
}
else
#endif
// Second pass - process columns
dataRef = ref Unsafe.As<Block8x8F, float>(ref block);
for (int ctr = 7; ctr >= 0; ctr--)
{
FDCT8x4_LeftPart(ref s, ref d);
FDCT8x4_RightPart(ref s, ref d);
tmp0 = Unsafe.Add(ref dataRef, dctSize * 0) + Unsafe.Add(ref dataRef, dctSize * 7);
tmp7 = Unsafe.Add(ref dataRef, dctSize * 0) - Unsafe.Add(ref dataRef, dctSize * 7);
tmp1 = Unsafe.Add(ref dataRef, dctSize * 1) + Unsafe.Add(ref dataRef, dctSize * 6);
tmp6 = Unsafe.Add(ref dataRef, dctSize * 1) - Unsafe.Add(ref dataRef, dctSize * 6);
tmp2 = Unsafe.Add(ref dataRef, dctSize * 2) + Unsafe.Add(ref dataRef, dctSize * 5);
tmp5 = Unsafe.Add(ref dataRef, dctSize * 2) - Unsafe.Add(ref dataRef, dctSize * 5);
tmp3 = Unsafe.Add(ref dataRef, dctSize * 3) + Unsafe.Add(ref dataRef, dctSize * 4);
tmp4 = Unsafe.Add(ref dataRef, dctSize * 3) - Unsafe.Add(ref dataRef, dctSize * 4);
// Even part
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
Unsafe.Add(ref dataRef, dctSize * 0) = tmp10 + tmp11;
Unsafe.Add(ref dataRef, dctSize * 4) = tmp10 - tmp11;
z1 = (tmp12 + tmp13) * 0.707106781f;
Unsafe.Add(ref dataRef, dctSize * 2) = tmp13 + z1;
Unsafe.Add(ref dataRef, dctSize * 6) = tmp13 - z1;
// Odd part
tmp10 = tmp4 + tmp5;
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;
z5 = (tmp10 - tmp12) * 0.382683433f;
z2 = (0.541196100f * tmp10) + z5;
z4 = (1.306562965f * tmp12) + z5;
z3 = tmp11 * 0.707106781f;
z11 = tmp7 + z3;
z13 = tmp7 - z3;
Unsafe.Add(ref dataRef, dctSize * 5) = z13 + z2;
Unsafe.Add(ref dataRef, dctSize * 3) = z13 - z2;
Unsafe.Add(ref dataRef, dctSize * 1) = z11 + z4;
Unsafe.Add(ref dataRef, dctSize * 7) = z11 - z4;
dataRef = ref Unsafe.Add(ref dataRef, 1);
}
}
/// <summary>
/// Apply floating point FDCT from src into dest
/// Apply floating point FDCT inplace using <see cref="Vector4"/> API.
/// </summary>
/// <param name="src">Source</param>
/// <param name="dest">Destination</param>
/// <param name="temp">Temporary block provided by the caller for optimization</param>
/// <param name="offsetSourceByNeg128">If true, a constant -128.0 offset is applied for all values before FDCT </param>
public static void TransformFDCT(
ref Block8x8F src,
ref Block8x8F dest,
ref Block8x8F temp,
bool offsetSourceByNeg128 = true)
/// <remarks>
/// This implementation must be called only if hardware supports 4
/// floating point numbers vector. Otherwise explicit scalar
/// implementation <see cref="ForwardTransform_Scalar"/> is faster
/// because it does not rely on matrix transposition.
/// </remarks>
/// <param name="block">Input matrix.</param>
private static void ForwardTransform_Vector4(ref Block8x8F block)
{
src.TransposeInto(ref temp);
if (offsetSourceByNeg128)
{
temp.AddInPlace(-128F);
}
DebugGuard.IsTrue(Vector.IsHardwareAccelerated, "Scalar implementation should be called for non-accelerated hardware.");
FDCT8x8(ref temp, ref dest);
// First pass - process rows
block.TransposeInplace();
FDCT8x4_Vector4(ref block.V0L);
FDCT8x4_Vector4(ref block.V0R);
dest.TransposeInto(ref temp);
// Second pass - process columns
block.TransposeInplace();
FDCT8x4_Vector4(ref block.V0L);
FDCT8x4_Vector4(ref block.V0R);
}
FDCT8x8(ref temp, ref dest);
/// <summary>
/// Apply 1D floating point FDCT inplace on 8x4 part of 8x8 matrix.
/// </summary>
/// <remarks>
/// Implemented using Vector4 API operations for either scalar or sse hardware implementation.
/// Must be called on both 8x4 matrix parts for the full FDCT transform.
/// </remarks>
/// <param name="blockRef">Input reference to the first </param>
private static void FDCT8x4_Vector4(ref Vector4 blockRef)
{
Vector4 tmp0 = Unsafe.Add(ref blockRef, 0) + Unsafe.Add(ref blockRef, 14);
Vector4 tmp7 = Unsafe.Add(ref blockRef, 0) - Unsafe.Add(ref blockRef, 14);
Vector4 tmp1 = Unsafe.Add(ref blockRef, 2) + Unsafe.Add(ref blockRef, 12);
Vector4 tmp6 = Unsafe.Add(ref blockRef, 2) - Unsafe.Add(ref blockRef, 12);
Vector4 tmp2 = Unsafe.Add(ref blockRef, 4) + Unsafe.Add(ref blockRef, 10);
Vector4 tmp5 = Unsafe.Add(ref blockRef, 4) - Unsafe.Add(ref blockRef, 10);
Vector4 tmp3 = Unsafe.Add(ref blockRef, 6) + Unsafe.Add(ref blockRef, 8);
Vector4 tmp4 = Unsafe.Add(ref blockRef, 6) - Unsafe.Add(ref blockRef, 8);
// Even part
Vector4 tmp10 = tmp0 + tmp3;
Vector4 tmp13 = tmp0 - tmp3;
Vector4 tmp11 = tmp1 + tmp2;
Vector4 tmp12 = tmp1 - tmp2;
Unsafe.Add(ref blockRef, 0) = tmp10 + tmp11;
Unsafe.Add(ref blockRef, 8) = tmp10 - tmp11;
Vector4 z1 = (tmp12 + tmp13) * mm128_F_0_7071;
Unsafe.Add(ref blockRef, 4) = tmp13 + z1;
Unsafe.Add(ref blockRef, 12) = tmp13 - z1;
// Odd part
tmp10 = tmp4 + tmp5;
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;
Vector4 z5 = (tmp10 - tmp12) * mm128_F_0_3826;
Vector4 z2 = (mm128_F_0_5411 * tmp10) + z5;
Vector4 z4 = (mm128_F_1_3065 * tmp12) + z5;
Vector4 z3 = tmp11 * mm128_F_0_7071;
Vector4 z11 = tmp7 + z3;
Vector4 z13 = tmp7 - z3;
Unsafe.Add(ref blockRef, 10) = z13 + z2;
Unsafe.Add(ref blockRef, 6) = z13 - z2;
Unsafe.Add(ref blockRef, 2) = z11 + z4;
Unsafe.Add(ref blockRef, 14) = z11 - z4;
}
dest.MultiplyInPlace(C_0_125);
/// <summary>
/// Apply floating point IDCT inplace.
/// Ported from https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L239.
/// </summary>
/// <param name="block">Input matrix.</param>
/// <param name="temp">Matrix to store temporal results.</param>
public static void TransformIDCT(ref Block8x8F block, ref Block8x8F temp)
{
block.TransposeInplace();
IDCT8x8(ref block, ref temp);
temp.TransposeInplace();
IDCT8x8(ref temp, ref block);
// TODO: This can be fused into quantization table step
block.MultiplyInPlace(C_0_125);
}
/// <summary>
@ -303,7 +322,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
public static void IDCT8x8(ref Block8x8F s, ref Block8x8F d)
private static void IDCT8x8(ref Block8x8F s, ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
@ -432,83 +451,5 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
d.V3R = my3 + mb3;
d.V4R = my3 - mb3;
}
/// <summary>
/// Combined operation of <see cref="IDCT8x4_LeftPart(ref Block8x8F, ref Block8x8F)"/> and <see cref="IDCT8x4_RightPart(ref Block8x8F, ref Block8x8F)"/>
/// using AVX commands.
/// </summary>
/// <param name="s">Source</param>
/// <param name="d">Destination</param>
public static void IDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d)
{
#if SUPPORTS_RUNTIME_INTRINSICS
Debug.Assert(Avx.IsSupported, "AVX is required to execute this method");
Vector256<float> my1 = s.V1;
Vector256<float> my7 = s.V7;
Vector256<float> mz0 = Avx.Add(my1, my7);
Vector256<float> my3 = s.V3;
Vector256<float> mz2 = Avx.Add(my3, my7);
Vector256<float> my5 = s.V5;
Vector256<float> mz1 = Avx.Add(my3, my5);
Vector256<float> mz3 = Avx.Add(my1, my5);
Vector256<float> mz4 = Avx.Multiply(Avx.Add(mz0, mz1), C_V_1_1758);
mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz2, C_V_n1_9615);
mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz3, C_V_n0_3901);
mz0 = Avx.Multiply(mz0, C_V_n0_8999);
mz1 = Avx.Multiply(mz1, C_V_n2_5629);
Vector256<float> mb3 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my7, C_V_0_2986), mz2);
Vector256<float> mb2 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my5, C_V_2_0531), mz3);
Vector256<float> mb1 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my3, C_V_3_0727), mz2);
Vector256<float> mb0 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my1, C_V_1_5013), mz3);
Vector256<float> my2 = s.V2;
Vector256<float> my6 = s.V6;
mz4 = Avx.Multiply(Avx.Add(my2, my6), C_V_0_5411);
Vector256<float> my0 = s.V0;
Vector256<float> my4 = s.V4;
mz0 = Avx.Add(my0, my4);
mz1 = Avx.Subtract(my0, my4);
mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my6, C_V_n1_8477);
mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my2, C_V_0_7653);
my0 = Avx.Add(mz0, mz3);
my3 = Avx.Subtract(mz0, mz3);
my1 = Avx.Add(mz1, mz2);
my2 = Avx.Subtract(mz1, mz2);
d.V0 = Avx.Add(my0, mb0);
d.V7 = Avx.Subtract(my0, mb0);
d.V1 = Avx.Add(my1, mb1);
d.V6 = Avx.Subtract(my1, mb1);
d.V2 = Avx.Add(my2, mb2);
d.V5 = Avx.Subtract(my2, mb2);
d.V3 = Avx.Add(my3, mb3);
d.V4 = Avx.Subtract(my3, mb3);
#endif
}
/// <summary>
/// Apply floating point IDCT transformation into dest, using a temporary block 'temp' provided by the caller (optimization).
/// Ported from https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L239
/// </summary>
/// <param name="src">Source</param>
/// <param name="dest">Destination</param>
/// <param name="temp">Temporary block provided by the caller</param>
public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp)
{
src.TransposeInto(ref temp);
IDCT8x8(ref temp, ref dest);
dest.TransposeInto(ref temp);
IDCT8x8(ref temp, ref dest);
// TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing?
dest.MultiplyInPlace(C_0_125);
}
}
}

67
src/ImageSharp/Formats/Jpeg/Components/Quantization.cs

@ -39,53 +39,59 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
public const int QualityEstimationConfidenceUpperThreshold = 98;
/// <summary>
/// Gets the unscaled luminance quantization table in zig-zag order. Each
/// encoder copies and scales the tables according to its quality parameter.
/// The values are derived from ITU section K.1 after converting from natural to
/// zig-zag order.
/// Gets unscaled luminance quantization table.
/// </summary>
/// <remarks>
/// The values are derived from ITU section K.1.
/// </remarks>
// The C# compiler emits this as a compile-time constant embedded in the PE file.
// This is effectively compiled down to: return new ReadOnlySpan<byte>(&data, length)
// More details can be found: https://github.com/dotnet/roslyn/pull/24621
public static ReadOnlySpan<byte> UnscaledQuant_Luminance => new byte[]
public static ReadOnlySpan<byte> LuminanceTable => new byte[]
{
16, 11, 12, 14, 12, 10, 16, 14, 13, 14, 18, 17, 16, 19, 24,
40, 26, 24, 22, 22, 24, 49, 35, 37, 29, 40, 58, 51, 61, 60,
57, 51, 56, 55, 64, 72, 92, 78, 64, 68, 87, 69, 55, 56, 80,
109, 81, 87, 95, 98, 103, 104, 103, 62, 77, 113, 121, 112,
100, 120, 92, 101, 103, 99,
16, 11, 10, 16, 24, 40, 51, 61,
12, 12, 14, 19, 26, 58, 60, 55,
14, 13, 16, 24, 40, 57, 69, 56,
14, 17, 22, 29, 51, 87, 80, 62,
18, 22, 37, 56, 68, 109, 103, 77,
24, 35, 55, 64, 81, 104, 113, 92,
49, 64, 78, 87, 103, 121, 120, 101,
72, 92, 95, 98, 112, 100, 103, 99,
};
/// <summary>
/// Gets the unscaled chrominance quantization table in zig-zag order. Each
/// encoder copies and scales the tables according to its quality parameter.
/// The values are derived from ITU section K.1 after converting from natural to
/// zig-zag order.
/// Gets unscaled chrominance quantization table.
/// </summary>
/// <remarks>
/// The values are derived from ITU section K.1.
/// </remarks>
// The C# compiler emits this as a compile-time constant embedded in the PE file.
// This is effectively compiled down to: return new ReadOnlySpan<byte>(&data, length)
// More details can be found: https://github.com/dotnet/roslyn/pull/24621
public static ReadOnlySpan<byte> UnscaledQuant_Chrominance => new byte[]
public static ReadOnlySpan<byte> ChrominanceTable => new byte[]
{
17, 18, 18, 24, 21, 24, 47, 26, 26, 47, 99, 66, 56, 66,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
17, 18, 24, 47, 99, 99, 99, 99,
18, 21, 26, 66, 99, 99, 99, 99,
24, 26, 56, 99, 99, 99, 99, 99,
47, 66, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
};
/// Ported from JPEGsnoop:
/// https://github.com/ImpulseAdventure/JPEGsnoop/blob/9732ee0961f100eb69bbff4a0c47438d5997abee/source/JfifDecode.cpp#L4570-L4694
/// <summary>
/// Estimates jpeg quality based on quantization table in zig-zag order.
/// Estimates jpeg quality based on standard quantization table.
/// </summary>
/// <remarks>
/// This technically can be used with any given table but internal decoder code uses ITU spec tables:
/// <see cref="UnscaledQuant_Luminance"/> and <see cref="UnscaledQuant_Chrominance"/>.
/// Technically, this can be used with any given table but internal decoder code uses ITU spec tables:
/// <see cref="LuminanceTable"/> and <see cref="ChrominanceTable"/>.
/// </remarks>
/// <param name="table">Input quantization table.</param>
/// <param name="target">Quantization to estimate against.</param>
/// <returns>Estimated quality</returns>
/// <param name="target">Natural order quantization table to estimate against.</param>
/// <returns>Estimated quality.</returns>
public static int EstimateQuality(ref Block8x8F table, ReadOnlySpan<byte> target)
{
// This method can be SIMD'ified if standard table is injected as Block8x8F.
@ -106,11 +112,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
int quality;
for (int i = 0; i < Block8x8F.Size; i++)
{
float coeff = table[i];
int coeffInteger = (int)coeff;
int coeff = (int)table[i];
// Coefficients are actually int16 casted to float numbers so there's no truncating error.
if (coeffInteger != 0)
if (coeff != 0)
{
comparePercent = 100.0 * (table[i] / target[i]);
}
@ -152,7 +157,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <returns>Estimated quality</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int EstimateLuminanceQuality(ref Block8x8F luminanceTable)
=> EstimateQuality(ref luminanceTable, UnscaledQuant_Luminance);
=> EstimateQuality(ref luminanceTable, LuminanceTable);
/// <summary>
/// Estimates jpeg quality based on quantization table in zig-zag order.
@ -161,7 +166,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// <returns>Estimated quality</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int EstimateChrominanceQuality(ref Block8x8F chrominanceTable)
=> EstimateQuality(ref chrominanceTable, UnscaledQuant_Chrominance);
=> EstimateQuality(ref chrominanceTable, ChrominanceTable);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int QualityToScale(int quality)
@ -185,10 +190,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Block8x8F ScaleLuminanceTable(int quality)
=> ScaleQuantizationTable(scale: QualityToScale(quality), UnscaledQuant_Luminance);
=> ScaleQuantizationTable(scale: QualityToScale(quality), LuminanceTable);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Block8x8F ScaleChrominanceTable(int quality)
=> ScaleQuantizationTable(scale: QualityToScale(quality), UnscaledQuant_Chrominance);
=> ScaleQuantizationTable(scale: QualityToScale(quality), ChrominanceTable);
}
}

300
src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs

@ -0,0 +1,300 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
#if SUPPORTS_RUNTIME_INTRINSICS
using System;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal static partial class ZigZag
{
#pragma warning disable SA1309 // naming rules violation warnings
/// <summary>
/// Special byte value to zero out elements during Sse/Avx shuffle intrinsics.
/// </summary>
private const byte _ = 0xff;
#pragma warning restore SA1309
/// <summary>
/// Gets shuffle vectors for <see cref="ApplyZigZagOrderingSsse3"/>
/// zig zag implementation.
/// </summary>
private static ReadOnlySpan<byte> SseShuffleMasks => new byte[]
{
// row0
0, 1, 2, 3, _, _, _, _, _, _, 4, 5, 6, 7, _, _,
_, _, _, _, 0, 1, _, _, 2, 3, _, _, _, _, 4, 5,
_, _, _, _, _, _, 0, 1, _, _, _, _, _, _, _, _,
// row1
_, _, _, _, _, _, _, _, _, _, _, _, 8, 9, 10, 11,
2, 3, _, _, _, _, _, _, 4, 5, _, _, _, _, _, _,
_, _, 0, 1, _, _, 2, 3, _, _, _, _, _, _, _, _,
// row2
_, _, _, _, _, _, 2, 3, _, _, _, _, _, _, 4, 5,
_, _, _, _, _, _, _, _, 0, 1, _, _, 2, 3, _, _,
// row3
_, _, _, _, _, _, 12, 13, 14, 15, _, _, _, _, _, _,
_, _, _, _, 10, 11, _, _, _, _, 12, 13, _, _, _, _,
_, _, 8, 9, _, _, _, _, _, _, _, _, 10, 11, _, _,
6, 7, _, _, _, _, _, _, _, _, _, _, _, _, 8, 9,
// row4
_, _, 4, 5, _, _, _, _, _, _, _, _, 6, 7, _, _,
_, _, _, _, 2, 3, _, _, _, _, 4, 5, _, _, _, _,
_, _, _, _, _, _, 0, 1, 2, 3, _, _, _, _, _, _,
// row5
_, _, 12, 13, _, _, 14, 15, _, _, _, _, _, _, _, _,
10, 11, _, _, _, _, _, _, 12, 13, _, _, _, _, _, _,
// row6
_, _, _, _, _, _, _, _, 12, 13, _, _, 14, 15, _, _,
_, _, _, _, _, _, 10, 11, _, _, _, _, _, _, 12, 13,
4, 5, 6, 7, _, _, _, _, _, _, _, _, _, _, _, _,
// row7
10, 11, _, _, _, _, 12, 13, _, _, 14, 15, _, _, _, _,
_, _, 8, 9, 10, 11, _, _, _, _, _, _, 12, 13, 14, 15
};
/// <summary>
/// Gets shuffle vectors for <see cref="ApplyZigZagOrderingAvx2"/>
/// zig zag implementation.
/// </summary>
private static ReadOnlySpan<byte> AvxShuffleMasks => new byte[]
{
// 01_AB/01_EF/23_CD - cross-lane
0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0,
// 01_AB - inner-lane
0, 1, 2, 3, 8, 9, _, _, 10, 11, 4, 5, 6, 7, 12, 13, _, _, _, _, _, _, _, _, _, _, 10, 11, 4, 5, 6, 7,
// 01_CD/23_GH - cross-lane
0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, _, _, _, _, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, _, _, _, _,
// 01_CD - inner-lane
_, _, _, _, _, _, 0, 1, _, _, _, _, _, _, _, _, 2, 3, 8, 9, _, _, 10, 11, 4, 5, _, _, _, _, _, _,
// 01_EF - inner-lane
_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 0, 1, _, _, _, _, _, _, _, _, _, _,
// 23_AB/45_CD/67_EF - cross-lane
3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, _, _, _, _, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, _, _, _, _,
// 23_AB - inner-lane
4, 5, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 6, 7, 0, 1, 2, 3, 8, 9, _, _, _, _,
// 23_CD - inner-lane
_, _, 6, 7, 12, 13, _, _, _, _, _, _, _, _, _, _, 10, 11, 4, 5, _, _, _, _, _, _, _, _, 6, 7, 12, 13,
// 23_EF - inner-lane
_, _, _, _, _, _, 2, 3, 8, 9, _, _, 10, 11, 4, 5, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _,
// 23_GH - inner-lane
_, _, _, _, _, _, _, _, _, _, 0, 1, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _,
// 45_AB - inner-lane
_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 10, 11, _, _, _, _, _, _, _, _, _, _,
// 45_CD - inner-lane
_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 6, 7, 0, 1, _, _, 2, 3, 8, 9, _, _, _, _, _, _,
// 45_EF - cross-lane
1, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, _, _, _, _, 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0,
// 45_EF - inner-lane
2, 3, 8, 9, _, _, _, _, _, _, _, _, 10, 11, 4, 5, _, _, _, _, _, _, _, _, _, _, 2, 3, 8, 9, _, _,
// 45_GH - inner-lane
_, _, _, _, 2, 3, 8, 9, 10, 11, 4, 5, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 6, 7,
// 67_CD - inner-lane
_, _, _, _, _, _, _, _, _, _, 10, 11, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _,
// 67_EF - inner-lane
_, _, _, _, _, _, 6, 7, 0, 1, _, _, 2, 3, 8, 9, _, _, _, _, _, _, _, _, 10, 11, _, _, _, _, _, _,
// 67_GH - inner-lane
8, 9, 10, 11, 4, 5, _, _, _, _, _, _, _, _, _, _, 2, 3, 8, 9, 10, 11, 4, 5, _, _, 6, 7, 12, 13, 14, 15
};
/// <summary>
/// Applies zig zag ordering for given 8x8 matrix using SSE cpu intrinsics.
/// </summary>
/// <param name="block">Input matrix.</param>
public static unsafe void ApplyZigZagOrderingSsse3(ref Block8x8 block)
{
DebugGuard.IsTrue(Ssse3.IsSupported, "Ssse3 support is required to run this operation!");
fixed (byte* maskPtr = SseShuffleMasks)
{
Vector128<byte> rowA = block.V0.AsByte();
Vector128<byte> rowB = block.V1.AsByte();
Vector128<byte> rowC = block.V2.AsByte();
Vector128<byte> rowD = block.V3.AsByte();
Vector128<byte> rowE = block.V4.AsByte();
Vector128<byte> rowF = block.V5.AsByte();
Vector128<byte> rowG = block.V6.AsByte();
Vector128<byte> rowH = block.V7.AsByte();
// row0 - A0 A1 B0 C0 B1 A2 A3 B2
Vector128<short> rowA0 = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (16 * 0))).AsInt16();
Vector128<short> rowB0 = Ssse3.Shuffle(rowB, Sse2.LoadVector128(maskPtr + (16 * 1))).AsInt16();
Vector128<short> row0 = Sse2.Or(rowA0, rowB0);
Vector128<short> rowC0 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 2))).AsInt16();
row0 = Sse2.Or(row0, rowC0);
// row1 - C1 D0 E0 D1 C2 B3 A4 A5
Vector128<short> rowA1 = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (16 * 3))).AsInt16();
Vector128<short> rowC1 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 4))).AsInt16();
Vector128<short> row1 = Sse2.Or(rowA1, rowC1);
Vector128<short> rowD1 = Ssse3.Shuffle(rowD, Sse2.LoadVector128(maskPtr + (16 * 5))).AsInt16();
row1 = Sse2.Or(row1, rowD1);
row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowB.AsUInt16(), 3), 5).AsInt16();
row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 0), 2).AsInt16();
// row2
Vector128<short> rowE2 = Ssse3.Shuffle(rowE, Sse2.LoadVector128(maskPtr + (16 * 6))).AsInt16();
Vector128<short> rowF2 = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (16 * 7))).AsInt16();
Vector128<short> row2 = Sse2.Or(rowE2, rowF2);
row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowB.AsUInt16(), 4), 0).AsInt16();
row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowC.AsUInt16(), 3), 1).AsInt16();
row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 2), 2).AsInt16();
row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowG.AsUInt16(), 0), 5).AsInt16();
// row3
Vector128<short> rowA3 = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (16 * 8))).AsInt16().AsInt16();
Vector128<short> rowB3 = Ssse3.Shuffle(rowB, Sse2.LoadVector128(maskPtr + (16 * 9))).AsInt16().AsInt16();
Vector128<short> row3 = Sse2.Or(rowA3, rowB3);
Vector128<short> rowC3 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 10))).AsInt16();
row3 = Sse2.Or(row3, rowC3);
Vector128<byte> shuffleRowD3EF = Sse2.LoadVector128(maskPtr + (16 * 11));
Vector128<short> rowD3 = Ssse3.Shuffle(rowD, shuffleRowD3EF).AsInt16();
row3 = Sse2.Or(row3, rowD3);
// row4
Vector128<short> rowE4 = Ssse3.Shuffle(rowE, shuffleRowD3EF).AsInt16();
Vector128<short> rowF4 = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (16 * 12))).AsInt16();
Vector128<short> row4 = Sse2.Or(rowE4, rowF4);
Vector128<short> rowG4 = Ssse3.Shuffle(rowG, Sse2.LoadVector128(maskPtr + (16 * 13))).AsInt16();
row4 = Sse2.Or(row4, rowG4);
Vector128<short> rowH4 = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (16 * 14))).AsInt16();
row4 = Sse2.Or(row4, rowH4);
// row5
Vector128<short> rowC5 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 15))).AsInt16();
Vector128<short> rowD5 = Ssse3.Shuffle(rowD, Sse2.LoadVector128(maskPtr + (16 * 16))).AsInt16();
Vector128<short> row5 = Sse2.Or(rowC5, rowD5);
row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowB.AsUInt16(), 7), 2).AsInt16();
row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 5), 5).AsInt16();
row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowF.AsUInt16(), 4), 6).AsInt16();
row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowG.AsUInt16(), 3), 7).AsInt16();
// row6
Vector128<short> rowE6 = Ssse3.Shuffle(rowE, Sse2.LoadVector128(maskPtr + (16 * 17))).AsInt16();
Vector128<short> rowF6 = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (16 * 18))).AsInt16();
Vector128<short> row6 = Sse2.Or(rowE6, rowF6);
Vector128<short> rowH6 = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (16 * 19))).AsInt16();
row6 = Sse2.Or(row6, rowH6);
row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 7), 5).AsInt16();
row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowG.AsUInt16(), 4), 2).AsInt16();
// row7
Vector128<short> rowG7 = Ssse3.Shuffle(rowG, Sse2.LoadVector128(maskPtr + (16 * 20))).AsInt16();
Vector128<short> rowH7 = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (16 * 21))).AsInt16();
Vector128<short> row7 = Sse2.Or(rowG7, rowH7);
row7 = Sse2.Insert(row7.AsUInt16(), Sse2.Extract(rowF.AsUInt16(), 7), 4).AsInt16();
block.V0 = row0;
block.V1 = row1;
block.V2 = row2;
block.V3 = row3;
block.V4 = row4;
block.V5 = row5;
block.V6 = row6;
block.V7 = row7;
}
}
/// <summary>
/// Applies zig zag ordering for given 8x8 matrix using AVX cpu intrinsics.
/// </summary>
/// <param name="block">Input matrix.</param>
public static unsafe void ApplyZigZagOrderingAvx2(ref Block8x8 block)
{
DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!");
fixed (byte* shuffleVectorsPtr = AvxShuffleMasks)
{
Vector256<byte> rowsAB = block.V01.AsByte();
Vector256<byte> rowsCD = block.V23.AsByte();
Vector256<byte> rowsEF = block.V45.AsByte();
Vector256<byte> rowsGH = block.V67.AsByte();
// rows 0 1
Vector256<int> rows_AB01_EF01_CD23_shuffleMask = Avx.LoadVector256(shuffleVectorsPtr + (0 * 32)).AsInt32();
Vector256<byte> row01_AB = Avx2.PermuteVar8x32(rowsAB.AsInt32(), rows_AB01_EF01_CD23_shuffleMask).AsByte();
row01_AB = Avx2.Shuffle(row01_AB, Avx.LoadVector256(shuffleVectorsPtr + (1 * 32))).AsByte();
Vector256<int> rows_CD01_GH23_shuffleMask = Avx.LoadVector256(shuffleVectorsPtr + (2 * 32)).AsInt32();
Vector256<byte> row01_CD = Avx2.PermuteVar8x32(rowsCD.AsInt32(), rows_CD01_GH23_shuffleMask).AsByte();
row01_CD = Avx2.Shuffle(row01_CD, Avx.LoadVector256(shuffleVectorsPtr + (3 * 32))).AsByte();
Vector256<byte> row0123_EF = Avx2.PermuteVar8x32(rowsEF.AsInt32(), rows_AB01_EF01_CD23_shuffleMask).AsByte();
Vector256<byte> row01_EF = Avx2.Shuffle(row0123_EF, Avx.LoadVector256(shuffleVectorsPtr + (4 * 32))).AsByte();
Vector256<byte> row01 = Avx2.Or(Avx2.Or(row01_AB, row01_CD), row01_EF);
// rows 2 3
Vector256<int> rows_AB23_CD45_EF67_shuffleMask = Avx.LoadVector256(shuffleVectorsPtr + (5 * 32)).AsInt32();
Vector256<byte> row2345_AB = Avx2.PermuteVar8x32(rowsAB.AsInt32(), rows_AB23_CD45_EF67_shuffleMask).AsByte();
Vector256<byte> row23_AB = Avx2.Shuffle(row2345_AB, Avx.LoadVector256(shuffleVectorsPtr + (6 * 32))).AsByte();
Vector256<byte> row23_CD = Avx2.PermuteVar8x32(rowsCD.AsInt32(), rows_AB01_EF01_CD23_shuffleMask).AsByte();
row23_CD = Avx2.Shuffle(row23_CD, Avx.LoadVector256(shuffleVectorsPtr + (7 * 32))).AsByte();
Vector256<byte> row23_EF = Avx2.Shuffle(row0123_EF, Avx.LoadVector256(shuffleVectorsPtr + (8 * 32))).AsByte();
Vector256<byte> row2345_GH = Avx2.PermuteVar8x32(rowsGH.AsInt32(), rows_CD01_GH23_shuffleMask).AsByte();
Vector256<byte> row23_GH = Avx2.Shuffle(row2345_GH, Avx.LoadVector256(shuffleVectorsPtr + (9 * 32)).AsByte());
Vector256<byte> row23 = Avx2.Or(Avx2.Or(row23_AB, row23_CD), Avx2.Or(row23_EF, row23_GH));
// rows 4 5
Vector256<byte> row45_AB = Avx2.Shuffle(row2345_AB, Avx.LoadVector256(shuffleVectorsPtr + (10 * 32)).AsByte());
Vector256<byte> row4567_CD = Avx2.PermuteVar8x32(rowsCD.AsInt32(), rows_AB23_CD45_EF67_shuffleMask).AsByte();
Vector256<byte> row45_CD = Avx2.Shuffle(row4567_CD, Avx.LoadVector256(shuffleVectorsPtr + (11 * 32)).AsByte());
Vector256<int> rows_EF45_GH67_shuffleMask = Avx.LoadVector256(shuffleVectorsPtr + (12 * 32)).AsInt32();
Vector256<byte> row45_EF = Avx2.PermuteVar8x32(rowsEF.AsInt32(), rows_EF45_GH67_shuffleMask).AsByte();
row45_EF = Avx2.Shuffle(row45_EF, Avx.LoadVector256(shuffleVectorsPtr + (13 * 32)).AsByte());
Vector256<byte> row45_GH = Avx2.Shuffle(row2345_GH, Avx.LoadVector256(shuffleVectorsPtr + (14 * 32)).AsByte());
Vector256<byte> row45 = Avx2.Or(Avx2.Or(row45_AB, row45_CD), Avx2.Or(row45_EF, row45_GH));
// rows 6 7
Vector256<byte> row67_CD = Avx2.Shuffle(row4567_CD, Avx.LoadVector256(shuffleVectorsPtr + (15 * 32)).AsByte());
Vector256<byte> row67_EF = Avx2.PermuteVar8x32(rowsEF.AsInt32(), rows_AB23_CD45_EF67_shuffleMask).AsByte();
row67_EF = Avx2.Shuffle(row67_EF, Avx.LoadVector256(shuffleVectorsPtr + (16 * 32)).AsByte());
Vector256<byte> row67_GH = Avx2.PermuteVar8x32(rowsGH.AsInt32(), rows_EF45_GH67_shuffleMask).AsByte();
row67_GH = Avx2.Shuffle(row67_GH, Avx.LoadVector256(shuffleVectorsPtr + (17 * 32)).AsByte());
Vector256<byte> row67 = Avx2.Or(Avx2.Or(row67_CD, row67_EF), row67_GH);
block.V01 = row01.AsInt16();
block.V23 = row23.AsInt16();
block.V45 = row45.AsInt16();
block.V67 = row67.AsInt16();
}
}
}
}
#endif

79
src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs

@ -2,21 +2,15 @@
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
/// <summary>
/// Holds the Jpeg UnZig array in a value/stack type.
/// Unzig maps from the zigzag ordering to the natural ordering. For example,
/// unzig[3] is the column and row of the fourth element in zigzag order. The
/// value is 16, which means first column (16%8 == 0) and third row (16/8 == 2).
/// </summary>
[StructLayout(LayoutKind.Sequential)]
internal unsafe struct ZigZag
internal static partial class ZigZag
{
/// <summary>
/// Gets span of zig-zag ordering indices.
/// </summary>
/// <remarks>
/// When reading corrupted data, the Huffman decoders could attempt
/// to reference an entry beyond the end of this array (if the decoded
/// zero run length reaches past the end of the block). To prevent
@ -25,20 +19,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
/// to be stored in location 63 of the block, not somewhere random.
/// The worst case would be a run-length of 15, which means we need 16
/// fake entries.
/// </summary>
private const int Size = 64 + 16;
/// <summary>
/// Copy of <see cref="Unzig"/> in a value type
/// </summary>
public fixed byte Data[Size];
/// <summary>
/// Gets the unzigs map, which maps from the zigzag ordering to the natural ordering.
/// For example, unzig[3] is the column and row of the fourth element in zigzag order.
/// The value is 16, which means first column (16%8 == 0) and third row (16/8 == 2).
/// </summary>
private static ReadOnlySpan<byte> Unzig => new byte[]
/// </remarks>
public static ReadOnlySpan<byte> ZigZagOrder => new byte[]
{
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
@ -48,53 +30,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63,
63, 63, 63, 63, 63, 63, 63, 63, // Extra entries for safety in decoder
// Extra entries for safety in decoder
63, 63, 63, 63, 63, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63
};
/// <summary>
/// Returns the value at the given index
/// </summary>
/// <param name="idx">The index</param>
/// <returns>The <see cref="byte"/></returns>
public byte this[int idx]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
ref byte self = ref Unsafe.As<ZigZag, byte>(ref this);
return Unsafe.Add(ref self, idx);
}
}
/// <summary>
/// Creates and fills an instance of <see cref="ZigZag"/> with Jpeg unzig indices
/// </summary>
/// <returns>The new instance</returns>
public static ZigZag CreateUnzigTable()
{
ZigZag result = default;
ref byte sourceRef = ref MemoryMarshal.GetReference(Unzig);
ref byte destinationRef = ref Unsafe.AsRef<byte>(result.Data);
Unzig.CopyTo(new Span<byte>(result.Data, Size));
return result;
}
/// <summary>
/// Apply Zigging to the given quantization table, so it will be sufficient to multiply blocks for dequantizing them.
/// </summary>
public static Block8x8F CreateDequantizationTable(ref Block8x8F qt)
{
Block8x8F result = default;
for (int i = 0; i < Block8x8F.Size; i++)
{
result[Unzig[i]] = qt[i];
}
return result;
}
}
}

10
src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs

@ -887,9 +887,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
stream.Read(this.temp, 0, 64);
remaining -= 64;
// Parsing quantization table & saving it in natural order
for (int j = 0; j < 64; j++)
{
table[j] = this.temp[j];
table[ZigZag.ZigZagOrder[j]] = this.temp[j];
}
break;
@ -907,9 +908,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
stream.Read(this.temp, 0, 128);
remaining -= 128;
// Parsing quantization table & saving it in natural order
for (int j = 0; j < 64; j++)
{
table[j] = (this.temp[2 * j] << 8) | this.temp[(2 * j) + 1];
table[ZigZag.ZigZagOrder[j]] = (this.temp[2 * j] << 8) | this.temp[(2 * j) + 1];
}
break;
@ -1069,13 +1071,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
// Types 0..1 DC..AC
if (tableType > 1)
{
JpegThrowHelper.ThrowInvalidImageContentException("Bad Huffman Table type.");
JpegThrowHelper.ThrowInvalidImageContentException($"Bad huffman table type: {tableType}");
}
// Max tables of each type
if (tableIndex > 3)
{
JpegThrowHelper.ThrowInvalidImageContentException("Bad Huffman Table index.");
JpegThrowHelper.ThrowInvalidImageContentException($"Bad huffman table index: {tableIndex}");
}
stream.Read(huffmanDataSpan, 0, 16);

51
src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs

@ -131,28 +131,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
this.WriteStartOfScan(componentCount, componentIds);
// Write the scan compressed data.
var scanEncoder = new HuffmanScanEncoder(stream);
if (this.colorType == JpegColorType.Luminance)
{
// luminance quantization table only.
scanEncoder.EncodeGrayscale(image, ref luminanceQuantTable, cancellationToken);
}
else
{
// luminance and chrominance quantization tables.
switch (this.colorType)
{
case JpegColorType.YCbCrRatio444:
case JpegColorType.Luminance:
scanEncoder.Encode444(image, ref luminanceQuantTable, ref chrominanceQuantTable, cancellationToken);
break;
case JpegColorType.YCbCrRatio420:
scanEncoder.Encode420(image, ref luminanceQuantTable, ref chrominanceQuantTable, cancellationToken);
break;
case JpegColorType.Rgb:
scanEncoder.EncodeRgb(image, ref luminanceQuantTable, cancellationToken);
break;
}
switch (this.colorType)
{
case JpegColorType.YCbCrRatio444:
new HuffmanScanEncoder(3, stream).Encode444(image, ref luminanceQuantTable, ref chrominanceQuantTable, cancellationToken);
break;
case JpegColorType.YCbCrRatio420:
new HuffmanScanEncoder(6, stream).Encode420(image, ref luminanceQuantTable, ref chrominanceQuantTable, cancellationToken);
break;
case JpegColorType.Luminance:
new HuffmanScanEncoder(1, stream).EncodeGrayscale(image, ref luminanceQuantTable, cancellationToken);
break;
case JpegColorType.Rgb:
new HuffmanScanEncoder(3, stream).EncodeRgb(image, ref luminanceQuantTable, cancellationToken);
break;
default:
// all other non-supported color types are checked at the start of this method
break;
}
// Write the End Of Image marker.
@ -193,7 +188,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
dqt[offset++] = (byte)i;
for (int j = 0; j < Block8x8F.Size; j++)
{
dqt[offset++] = (byte)quant[j];
dqt[offset++] = (byte)quant[ZigZag.ZigZagOrder[j]];
}
}
@ -735,11 +730,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
/// Initializes quantization tables.
/// </summary>
/// <remarks>
/// <para>
/// Zig-zag ordering is NOT applied to the resulting tables.
/// </para>
/// <para>
/// We take quality values in a hierarchical order:
/// 1. Check if encoder has set quality
/// 2. Check if metadata has special table for encoding
/// 3. Check if metadata has set quality
/// 4. Take default quality value - 75
/// 2. Check if metadata has set quality
/// 3. Take default quality value - 75
/// </para>
/// </remarks>
/// <param name="componentCount">Color components count.</param>
/// <param name="metadata">Jpeg metadata instance.</param>

11
src/ImageSharp/Formats/Tiff/Compression/Decompressors/JpegTiffCompression.cs

@ -65,22 +65,21 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Decompressors
scanDecoder.ResetInterval = 0;
jpegDecoder.ParseStream(stream, scanDecoder, CancellationToken.None);
using var image = new Image<Rgb24>(this.configuration, spectralConverter.PixelBuffer, new ImageMetadata());
CopyImageBytesToBuffer(buffer, image);
CopyImageBytesToBuffer(buffer, spectralConverter.PixelBuffer);
}
else
{
using var image = Image.Load<Rgb24>(stream);
CopyImageBytesToBuffer(buffer, image);
CopyImageBytesToBuffer(buffer, image.Frames.RootFrame.PixelBuffer);
}
}
private static void CopyImageBytesToBuffer(Span<byte> buffer, Image<Rgb24> image)
private static void CopyImageBytesToBuffer(Span<byte> buffer, Buffer2D<Rgb24> pixelBuffer)
{
int offset = 0;
for (int y = 0; y < image.Height; y++)
for (int y = 0; y < pixelBuffer.Height; y++)
{
Span<Rgb24> pixelRowSpan = image.GetPixelRowSpan(y);
Span<Rgb24> pixelRowSpan = pixelBuffer.GetRowSpan(y);
Span<byte> rgbBytes = MemoryMarshal.AsBytes(pixelRowSpan);
rgbBytes.CopyTo(buffer.Slice(offset));
offset += rgbBytes.Length;

2
src/ImageSharp/Formats/Tiff/Compression/Decompressors/RgbJpegSpectralConverter.cs

@ -28,6 +28,6 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Decompressors
}
/// <inheritdoc/>
public override JpegColorConverter GetColorConverter(JpegFrame frame, IRawJpegData jpegData) => JpegColorConverter.GetConverter(JpegColorSpace.RGB, frame.Precision);
protected override JpegColorConverter GetColorConverter(JpegFrame frame, IRawJpegData jpegData) => JpegColorConverter.GetConverter(JpegColorSpace.RGB, frame.Precision);
}
}

50
tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Quantize.cs

@ -0,0 +1,50 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Block8x8F_Quantize
{
private Block8x8F block = CreateFromScalar(1);
private Block8x8F quant = CreateFromScalar(1);
private Block8x8 result = default;
[Benchmark]
public short Quantize()
{
Block8x8F.Quantize(ref this.block, ref this.result, ref this.quant);
return this.result[0];
}
private static Block8x8F CreateFromScalar(float scalar)
{
Block8x8F block = default;
for (int i = 0; i < 64; i++)
{
block[i] = scalar;
}
return block;
}
}
}
/*
BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19042.1165 (20H2/October2020Update)
Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
.NET SDK=6.0.100-preview.3.21202.5
[Host] : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
1. No HwIntrinsics : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
2. SSE : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
3. AVX : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
| Method | Job | Mean | Error | StdDev | Ratio |
|--------- |-----------------|---------:|---------:|---------:|------:|
| Quantize | No HwIntrinsics | 73.34 ns | 1.081 ns | 1.011 ns | 1.00 |
| Quantize | SSE | 24.11 ns | 0.298 ns | 0.279 ns | 0.33 |
| Quantize | AVX | 15.90 ns | 0.074 ns | 0.065 ns | 0.22 |
*/

33
tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs

@ -9,29 +9,44 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Block8x8F_Transpose
{
private static readonly Block8x8F Source = Create8x8FloatData();
private Block8x8F source = Create8x8FloatData();
[Benchmark]
public void TransposeInto()
public float TransposeInplace()
{
var dest = default(Block8x8F);
Source.TransposeInto(ref dest);
this.source.TransposeInplace();
return this.source[0];
}
private static Block8x8F Create8x8FloatData()
{
var result = new float[64];
Block8x8F block = default;
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 8; j++)
{
result[(i * 8) + j] = (i * 10) + j;
block[(i * 8) + j] = (i * 10) + j;
}
}
var source = default(Block8x8F);
source.LoadFrom(result);
return source;
return block;
}
}
}
/*
BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19042.1237 (20H2/October2020Update)
Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
.NET SDK=6.0.100-preview.3.21202.5
[Host] : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
1. No HwIntrinsics : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
2. SSE : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
3. AVX : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
Runtime=.NET Core 3.1
| Method | Job | Mean | Error | StdDev | Ratio |
|----------------- |----------------:|----------:|----------:|----------:|------:|
| TransposeInplace | No HwIntrinsics | 12.531 ns | 0.0637 ns | 0.0565 ns | 1.00 |
| TransposeInplace | AVX | 5.767 ns | 0.0529 ns | 0.0495 ns | 0.46 |
*/

26
tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs

@ -111,24 +111,24 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
}
/*
BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19042
BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19042
Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
.NET Core SDK=6.0.100-preview.3.21202.5
[Host] : .NET Core 3.1.13 (CoreCLR 4.700.21.11102, CoreFX 4.700.21.11602), X64 RyuJIT
DefaultJob : .NET Core 3.1.13 (CoreCLR 4.700.21.11102, CoreFX 4.700.21.11602), X64 RyuJIT
.NET SDK=6.0.100-preview.3.21202.5
[Host] : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
DefaultJob : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
| Method | Quality | Mean | Error | StdDev | Ratio |
|---------------------------- |-------- |---------:|---------:|---------:|------:|
| 'System.Drawing Jpeg 4:2:0' | 75 | 29.41 ms | 0.108 ms | 0.096 ms | 1.00 |
| 'ImageSharp Jpeg 4:2:0' | 75 | 26.30 ms | 0.131 ms | 0.109 ms | 0.89 |
| 'ImageSharp Jpeg 4:4:4' | 75 | 36.70 ms | 0.303 ms | 0.269 ms | 1.25 |
| 'System.Drawing Jpeg 4:2:0' | 75 | 30.04 ms | 0.540 ms | 0.479 ms | 1.00 |
| 'ImageSharp Jpeg 4:2:0' | 75 | 19.32 ms | 0.290 ms | 0.257 ms | 0.64 |
| 'ImageSharp Jpeg 4:4:4' | 75 | 26.76 ms | 0.332 ms | 0.294 ms | 0.89 |
| | | | | | |
| 'System.Drawing Jpeg 4:2:0' | 90 | 32.67 ms | 0.226 ms | 0.211 ms | 1.00 |
| 'ImageSharp Jpeg 4:2:0' | 90 | 33.56 ms | 0.237 ms | 0.222 ms | 1.03 |
| 'ImageSharp Jpeg 4:4:4' | 90 | 44.82 ms | 0.250 ms | 0.234 ms | 1.37 |
| 'System.Drawing Jpeg 4:2:0' | 90 | 32.82 ms | 0.184 ms | 0.163 ms | 1.00 |
| 'ImageSharp Jpeg 4:2:0' | 90 | 25.00 ms | 0.408 ms | 0.361 ms | 0.76 |
| 'ImageSharp Jpeg 4:4:4' | 90 | 31.83 ms | 0.636 ms | 0.595 ms | 0.97 |
| | | | | | |
| 'System.Drawing Jpeg 4:2:0' | 100 | 39.06 ms | 0.233 ms | 0.218 ms | 1.00 |
| 'ImageSharp Jpeg 4:2:0' | 100 | 40.23 ms | 0.225 ms | 0.277 ms | 1.03 |
| 'ImageSharp Jpeg 4:4:4' | 100 | 63.35 ms | 0.486 ms | 0.431 ms | 1.62 |
| 'System.Drawing Jpeg 4:2:0' | 100 | 39.30 ms | 0.359 ms | 0.318 ms | 1.00 |
| 'ImageSharp Jpeg 4:2:0' | 100 | 34.49 ms | 0.265 ms | 0.235 ms | 0.88 |
| 'ImageSharp Jpeg 4:4:4' | 100 | 56.40 ms | 0.565 ms | 0.501 ms | 1.44 |
*/

10
tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs

@ -65,17 +65,17 @@ namespace SixLabors.ImageSharp.Benchmarks
.WithId("1. No HwIntrinsics").AsBaseline());
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx.IsSupported)
if (Sse.IsSupported)
{
this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31)
.WithId("2. AVX"));
.WithEnvironmentVariables(new EnvironmentVariable(EnableAVX, Off))
.WithId("2. SSE"));
}
if (Sse.IsSupported)
if (Avx.IsSupported)
{
this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31)
.WithEnvironmentVariables(new EnvironmentVariable(EnableAVX, Off))
.WithId("3. SSE"));
.WithId("3. AVX"));
}
#endif
}

11
tests/ImageSharp.Benchmarks/Program.cs

@ -1,8 +1,6 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Reflection;
using BenchmarkDotNet.Running;
namespace SixLabors.ImageSharp.Benchmarks
@ -15,9 +13,8 @@ namespace SixLabors.ImageSharp.Benchmarks
/// <param name="args">
/// The arguments to pass to the program.
/// </param>
public static void Main(string[] args)
{
new BenchmarkSwitcher(typeof(Program).GetTypeInfo().Assembly).Run(args);
}
public static void Main(string[] args) => BenchmarkSwitcher
.FromAssembly(typeof(Program).Assembly)
.Run(args);
}
}

3
tests/ImageSharp.Tests.ProfilingSandbox/Program.cs

@ -1,6 +1,3 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using SixLabors.ImageSharp.Tests.Formats.Jpg;
using SixLabors.ImageSharp.Tests.PixelFormats.PixelOperations;

135
tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

@ -4,7 +4,9 @@
// Uncomment this to turn unit tests into benchmarks:
// #define BENCHMARKING
using System;
using System.Diagnostics;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics.X86;
#endif
using SixLabors.ImageSharp.Formats.Jpeg.Components;
using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils;
@ -164,52 +166,27 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
}
[Fact]
public void TransposeInto()
public void TransposeInplace()
{
static void RunTest()
{
float[] expected = Create8x8FloatData();
ReferenceImplementations.Transpose8x8(expected);
var source = default(Block8x8F);
source.LoadFrom(Create8x8FloatData());
var block8x8 = default(Block8x8F);
block8x8.LoadFrom(Create8x8FloatData());
var dest = default(Block8x8F);
source.TransposeInto(ref dest);
block8x8.TransposeInplace();
float[] actual = new float[64];
dest.ScaledCopyTo(actual);
block8x8.ScaledCopyTo(actual);
Assert.Equal(expected, actual);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX);
}
private class BufferHolder
{
public Block8x8F Buffer;
}
[Fact]
public void TransposeInto_Benchmark()
{
var source = new BufferHolder();
source.Buffer.LoadFrom(Create8x8FloatData());
var dest = new BufferHolder();
this.Output.WriteLine($"TransposeInto_PinningImpl_Benchmark X {Times} ...");
var sw = Stopwatch.StartNew();
for (int i = 0; i < Times; i++)
{
source.Buffer.TransposeInto(ref dest.Buffer);
}
sw.Stop();
this.Output.WriteLine($"TransposeInto_PinningImpl_Benchmark finished in {sw.ElapsedMilliseconds} ms");
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableHWIntrinsic);
}
private static float[] Create8x8ColorCropTestData()
@ -273,32 +250,44 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public unsafe void Quantize(int seed)
[InlineData(1, 2)]
[InlineData(2, 1)]
public void Quantize(int srcSeed, int qtSeed)
{
var block = default(Block8x8F);
block.LoadFrom(Create8x8RoundedRandomFloatData(-2000, 2000, seed));
var qt = default(Block8x8F);
qt.LoadFrom(Create8x8RoundedRandomFloatData(-2000, 2000, seed));
static void RunTest(string srcSeedSerialized, string qtSeedSerialized)
{
int srcSeed = FeatureTestRunner.Deserialize<int>(srcSeedSerialized);
int qtSeed = FeatureTestRunner.Deserialize<int>(qtSeedSerialized);
var unzig = ZigZag.CreateUnzigTable();
Block8x8F source = CreateRandomFloatBlock(-2000, 2000, srcSeed);
int* expectedResults = stackalloc int[Block8x8F.Size];
ReferenceImplementations.QuantizeRational(&block, expectedResults, &qt, unzig.Data);
// Quantization code is used only in jpeg where it's guaranteed that
// qunatization valus are greater than 1
// Quantize method supports negative numbers by very small numbers can cause troubles
Block8x8F quant = CreateRandomFloatBlock(1, 2000, qtSeed);
var actualResults = default(Block8x8F);
// Reference implementation quantizes given block via division
Block8x8 expected = default;
ReferenceImplementations.Quantize(ref source, ref expected, ref quant, ZigZag.ZigZagOrder);
Block8x8F.Quantize(ref block, ref actualResults, ref qt, ref unzig);
// Actual current implementation quantizes given block via multiplication
// With quantization table reciprocal
for (int i = 0; i < Block8x8F.Size; i++)
{
quant[i] = 1f / quant[i];
}
for (int i = 0; i < Block8x8F.Size; i++)
{
int expected = expectedResults[i];
int actual = (int)actualResults[i];
Block8x8 actual = default;
Block8x8F.Quantize(ref source, ref actual, ref quant);
Assert.Equal(expected, actual);
Assert.True(CompareBlocks(expected, actual, 1, out int diff), $"Blocks are not equal, diff={diff}");
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
srcSeed,
qtSeed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE);
}
[Fact]
@ -368,48 +357,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public unsafe void DequantizeBlock(int seed)
{
Block8x8F original = CreateRandomFloatBlock(-500, 500, seed);
Block8x8F qt = CreateRandomFloatBlock(0, 10, seed + 42);
var unzig = ZigZag.CreateUnzigTable();
Block8x8F expected = original;
Block8x8F actual = original;
ReferenceImplementations.DequantizeBlock(&expected, &qt, unzig.Data);
Block8x8F.DequantizeBlock(&actual, &qt, unzig.Data);
this.CompareBlocks(expected, actual, 0);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public unsafe void ZigZag_CreateDequantizationTable_MultiplicationShouldQuantize(int seed)
{
Block8x8F original = CreateRandomFloatBlock(-500, 500, seed);
Block8x8F qt = CreateRandomFloatBlock(0, 10, seed + 42);
var unzig = ZigZag.CreateUnzigTable();
Block8x8F zigQt = ZigZag.CreateDequantizationTable(ref qt);
Block8x8F expected = original;
Block8x8F actual = original;
ReferenceImplementations.DequantizeBlock(&expected, &qt, unzig.Data);
actual.MultiplyInPlace(ref zigQt);
this.CompareBlocks(expected, actual, 0);
}
[Fact]
public void AddToAllInPlace()
{
@ -462,7 +409,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
short[] data = Create8x8ShortData();
var source = new Block8x8(data);
var source = Block8x8.Load(data);
Block8x8F dest = default;
dest.LoadFromInt16Scalar(ref source);
@ -483,7 +430,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
short[] data = Create8x8ShortData();
var source = new Block8x8(data);
var source = Block8x8.Load(data);
Block8x8F dest = default;
dest.LoadFromInt16ExtendedAvx2(ref source);

195
tests/ImageSharp.Tests/Formats/Jpg/Block8x8Tests.cs

@ -1,9 +1,10 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
using Xunit.Abstractions;
@ -22,7 +23,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{
short[] data = Create8x8ShortData();
var block = new Block8x8(data);
var block = Block8x8.Load(data);
for (int i = 0; i < Block8x8.Size; i++)
{
@ -43,32 +44,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
Assert.Equal(42, block[42]);
}
[Fact]
public unsafe void Indexer_GetScalarAt_SetScalarAt()
{
int sum;
var block = default(Block8x8);
for (int i = 0; i < Block8x8.Size; i++)
{
Block8x8.SetScalarAt(&block, i, (short)i);
}
sum = 0;
for (int i = 0; i < Block8x8.Size; i++)
{
sum += Block8x8.GetScalarAt(&block, i);
}
Assert.Equal(sum, 64 * 63 / 2);
}
[Fact]
public void AsFloatBlock()
{
short[] data = Create8x8ShortData();
var source = new Block8x8(data);
var source = Block8x8.Load(data);
Block8x8F dest = source.AsFloatBlock();
@ -82,7 +63,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
public void ToArray()
{
short[] data = Create8x8ShortData();
var block = new Block8x8(data);
var block = Block8x8.Load(data);
short[] result = block.ToArray();
@ -93,8 +74,8 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
public void Equality_WhenTrue()
{
short[] data = Create8x8ShortData();
var block1 = new Block8x8(data);
var block2 = new Block8x8(data);
var block1 = Block8x8.Load(data);
var block2 = Block8x8.Load(data);
block1[0] = 42;
block2[0] = 42;
@ -107,8 +88,8 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
public void Equality_WhenFalse()
{
short[] data = Create8x8ShortData();
var block1 = new Block8x8(data);
var block2 = new Block8x8(data);
var block1 = Block8x8.Load(data);
var block2 = Block8x8.Load(data);
block1[0] = 42;
block2[0] = 666;
@ -131,8 +112,8 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
public void TotalDifference()
{
short[] data = Create8x8ShortData();
var block1 = new Block8x8(data);
var block2 = new Block8x8(data);
var block1 = Block8x8.Load(data);
var block2 = Block8x8.Load(data);
block2[10] += 7;
block2[63] += 8;
@ -141,5 +122,159 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
Assert.Equal(15, d);
}
[Fact]
public void GetLastNonZeroIndex_AllZero()
{
static void RunTest()
{
Block8x8 data = default;
nint expected = -1;
nint actual = data.GetLastNonZeroIndex();
Assert.Equal(expected, actual);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Fact]
public void GetLastNonZeroIndex_AllNonZero()
{
static void RunTest()
{
Block8x8 data = default;
for (int i = 0; i < Block8x8.Size; i++)
{
data[i] = 10;
}
nint expected = Block8x8.Size - 1;
nint actual = data.GetLastNonZeroIndex();
Assert.Equal(expected, actual);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void GetLastNonZeroIndex_RandomFilledSingle(int seed)
{
static void RunTest(string seedSerialized)
{
int seed = FeatureTestRunner.Deserialize<int>(seedSerialized);
var rng = new Random(seed);
for (int i = 0; i < 1000; i++)
{
Block8x8 data = default;
int setIndex = rng.Next(1, Block8x8.Size);
data[setIndex] = (short)rng.Next(-2000, 2000);
nint expected = setIndex;
nint actual = data.GetLastNonZeroIndex();
Assert.Equal(expected, actual);
}
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void GetLastNonZeroIndex_RandomFilledPartially(int seed)
{
static void RunTest(string seedSerialized)
{
int seed = FeatureTestRunner.Deserialize<int>(seedSerialized);
var rng = new Random(seed);
for (int i = 0; i < 1000; i++)
{
Block8x8 data = default;
int lastIndex = rng.Next(1, Block8x8.Size);
short fillValue = (short)rng.Next(-2000, 2000);
for (int dataIndex = 0; dataIndex <= lastIndex; dataIndex++)
{
data[dataIndex] = fillValue;
}
int expected = lastIndex;
nint actual = data.GetLastNonZeroIndex();
Assert.Equal(expected, actual);
}
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void GetLastNonZeroIndex_RandomFilledFragmented(int seed)
{
static void RunTest(string seedSerialized)
{
int seed = FeatureTestRunner.Deserialize<int>(seedSerialized);
var rng = new Random(seed);
for (int i = 0; i < 1000; i++)
{
Block8x8 data = default;
short fillValue = (short)rng.Next(-2000, 2000);
// first filled chunk
int firstChunkStart = rng.Next(0, Block8x8.Size / 2);
int firstChunkEnd = rng.Next(firstChunkStart, Block8x8.Size / 2);
for (int dataIdx = firstChunkStart; dataIdx <= firstChunkEnd; dataIdx++)
{
data[dataIdx] = fillValue;
}
// second filled chunk, there might be a spot with zero(s) between first and second chunk
int secondChunkStart = rng.Next(firstChunkEnd, Block8x8.Size);
int secondChunkEnd = rng.Next(secondChunkStart, Block8x8.Size);
for (int dataIdx = secondChunkStart; dataIdx <= secondChunkEnd; dataIdx++)
{
data[dataIdx] = fillValue;
}
int expected = secondChunkEnd;
nint actual = data.GetLastNonZeroIndex();
Assert.True(expected == actual, $"Expected: {expected}\nActual: {actual}\nInput matrix: {data}");
}
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
}
}

163
tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs

@ -33,15 +33,14 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{
float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
var source = Block8x8F.Load(sourceArray);
var srcBlock = Block8x8F.Load(sourceArray);
Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref source);
Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref srcBlock);
var temp = default(Block8x8F);
var actual = default(Block8x8F);
FastFloatingPointDCT.TransformIDCT(ref source, ref actual, ref temp);
FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp);
this.CompareBlocks(expected, actual, 1f);
this.CompareBlocks(expected, srcBlock, 1f);
}
[Theory]
@ -52,15 +51,14 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{
float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
var source = Block8x8F.Load(sourceArray);
var srcBlock = Block8x8F.Load(sourceArray);
Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref source);
Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock);
var temp = default(Block8x8F);
var actual = default(Block8x8F);
FastFloatingPointDCT.TransformIDCT(ref source, ref actual, ref temp);
FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp);
this.CompareBlocks(expected, actual, 1f);
this.CompareBlocks(expected, srcBlock, 1f);
}
// Inverse transform
@ -120,24 +118,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
public void IDCT8x8_Avx(int seed)
{
#if SUPPORTS_RUNTIME_INTRINSICS
var skip = !Avx.IsSupported;
#else
var skip = true;
#endif
if (skip)
if (!Avx.IsSupported)
{
this.Output.WriteLine("No AVX present, skipping test!");
return;
}
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
var srcBlock = default(Block8x8F);
Block8x8F srcBlock = default;
srcBlock.LoadFrom(src);
var destBlock = default(Block8x8F);
Block8x8F destBlock = default;
var expectedDest = new float[64];
float[] expectedDest = new float[64];
// reference, left part
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest);
@ -148,10 +140,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
// testee, whole 8x8
FastFloatingPointDCT.IDCT8x8_Avx(ref srcBlock, ref destBlock);
var actualDest = new float[64];
float[] actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
#endif
}
[Theory]
@ -167,8 +160,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
var destBlock = default(Block8x8F);
var expectedDest = new float[64];
var temp1 = new float[64];
var temp2 = default(Block8x8F);
@ -177,10 +168,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp1);
// testee
FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref destBlock, ref temp2);
FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp2);
var actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
srcBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
@ -198,95 +189,8 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
}
// Forward transform
[Theory]
[InlineData(1)]
[InlineData(2)]
public void FDCT8x4_LeftPart(int seed)
{
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
var destBlock = default(Block8x8F);
var expectedDest = new float[64];
// reference
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D8x4_32f(src, expectedDest);
// testee
FastFloatingPointDCT.FDCT8x4_LeftPart(ref srcBlock, ref destBlock);
var actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void FDCT8x4_RightPart(int seed)
{
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
var destBlock = default(Block8x8F);
var expectedDest = new float[64];
// reference
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
// testee
FastFloatingPointDCT.FDCT8x4_RightPart(ref srcBlock, ref destBlock);
var actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void FDCT8x8_Avx(int seed)
{
#if SUPPORTS_RUNTIME_INTRINSICS
var skip = !Avx.IsSupported;
#else
var skip = true;
#endif
if (skip)
{
this.Output.WriteLine("No AVX present, skipping test!");
return;
}
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
var destBlock = default(Block8x8F);
var expectedDest = new float[64];
// reference, left part
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D8x4_32f(src, expectedDest);
// reference, right part
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
// testee, whole 8x8
FastFloatingPointDCT.FDCT8x8_Avx(ref srcBlock, ref destBlock);
var actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
// This test covers entire FDCT conversions chain
// This test checks all implementations: intrinsic and scalar fallback
[Theory]
[InlineData(1)]
[InlineData(2)]
@ -297,37 +201,38 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
int seed = FeatureTestRunner.Deserialize<int>(serialized);
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
var destBlock = default(Block8x8F);
var block = default(Block8x8F);
block.LoadFrom(src);
var expectedDest = new float[64];
var temp1 = new float[64];
var temp2 = default(Block8x8F);
float[] expectedDest = new float[64];
float[] temp1 = new float[64];
// reference
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true);
// testee
FastFloatingPointDCT.TransformFDCT(ref srcBlock, ref destBlock, ref temp2, false);
// Part of the FDCT calculations is fused into the quantization step
// We must multiply transformed block with reciprocal values from FastFloatingPointDCT.ANN_DCT_reciprocalAdjustmen
FastFloatingPointDCT.TransformFDCT(ref block);
for (int i = 0; i < 64; i++)
{
block[i] = block[i] * FastFloatingPointDCT.DctReciprocalAdjustmentCoefficients[i];
}
var actualDest = new float[64];
destBlock.ScaledCopyTo(actualDest);
float[] actualDest = block.ToArray();
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
Assert.Equal(expectedDest, actualDest, new ApproximateFloatComparer(1f));
}
// 3 paths:
// 1. AllowAll - call avx/fma implementation
// 2. DisableFMA - call avx implementation without fma acceleration
// 3. DisableAvx - call fallback code of Vector4 implementation
//
// DisableSSE isn't needed because fallback Vector4 code will compile to either sse or fallback code with same result
// 3. DisableAvx - call sse implementation
// 4. DisableHWIntrinsic - call scalar fallback implementation
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX);
HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX | HwIntrinsics.DisableHWIntrinsic);
}
}
}

152
tests/ImageSharp.Tests/Formats/Jpg/HuffmanScanEncoderTests.cs

@ -85,157 +85,5 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
Assert.Equal(expected, actual);
}
}
[Fact]
public void GetLastValuableElementIndex_AllZero()
{
static void RunTest()
{
Block8x8F data = default;
int expectedLessThan = 1;
int actual = HuffmanScanEncoder.GetLastValuableElementIndex(ref data);
Assert.True(actual < expectedLessThan);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Fact]
public void GetLastValuableElementIndex_AllNonZero()
{
static void RunTest()
{
Block8x8F data = default;
for (int i = 0; i < Block8x8F.Size; i++)
{
data[i] = 10;
}
int expected = Block8x8F.Size - 1;
int actual = HuffmanScanEncoder.GetLastValuableElementIndex(ref data);
Assert.Equal(expected, actual);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void GetLastValuableElementIndex_RandomFilledSingle(int seed)
{
static void RunTest(string seedSerialized)
{
int seed = FeatureTestRunner.Deserialize<int>(seedSerialized);
var rng = new Random(seed);
for (int i = 0; i < 1000; i++)
{
Block8x8F data = default;
int setIndex = rng.Next(1, Block8x8F.Size);
data[setIndex] = rng.Next();
int expected = setIndex;
int actual = HuffmanScanEncoder.GetLastValuableElementIndex(ref data);
Assert.Equal(expected, actual);
}
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void GetLastValuableElementIndex_RandomFilledPartially(int seed)
{
static void RunTest(string seedSerialized)
{
int seed = FeatureTestRunner.Deserialize<int>(seedSerialized);
var rng = new Random(seed);
for (int i = 0; i < 1000; i++)
{
Block8x8F data = default;
int lastIndex = rng.Next(1, Block8x8F.Size);
int fillValue = rng.Next();
for (int dataIndex = 0; dataIndex <= lastIndex; dataIndex++)
{
data[dataIndex] = fillValue;
}
int expected = lastIndex;
int actual = HuffmanScanEncoder.GetLastValuableElementIndex(ref data);
Assert.Equal(expected, actual);
}
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void GetLastValuableElementIndex_RandomFilledFragmented(int seed)
{
static void RunTest(string seedSerialized)
{
int seed = FeatureTestRunner.Deserialize<int>(seedSerialized);
var rng = new Random(seed);
for (int i = 0; i < 1000; i++)
{
Block8x8F data = default;
int fillValue = rng.Next();
// first filled chunk
int lastIndex1 = rng.Next(1, Block8x8F.Size / 2);
for (int dataIndex = 0; dataIndex <= lastIndex1; dataIndex++)
{
data[dataIndex] = fillValue;
}
// second filled chunk, there might be a spot with zero(s) between first and second chunk
int lastIndex2 = rng.Next(lastIndex1 + 1, Block8x8F.Size);
for (int dataIndex = 0; dataIndex <= lastIndex2; dataIndex++)
{
data[dataIndex] = fillValue;
}
int expected = lastIndex2;
int actual = HuffmanScanEncoder.GetLastValuableElementIndex(ref data);
Assert.Equal(expected, actual);
}
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
}
}

8
tests/ImageSharp.Tests/Formats/Jpg/QuantizationTests.cs

@ -21,7 +21,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
Block8x8F table = JpegQuantization.ScaleLuminanceTable(quality);
int estimatedQuality = JpegQuantization.EstimateLuminanceQuality(ref table);
Assert.True(quality.Equals(estimatedQuality), $"Failed to estimate luminance quality for standard table at quality level {quality}");
Assert.True(
quality.Equals(estimatedQuality),
$"Failed to estimate luminance quality for standard table at quality level {quality}");
}
}
@ -35,7 +37,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
Block8x8F table = JpegQuantization.ScaleChrominanceTable(quality);
int estimatedQuality = JpegQuantization.EstimateChrominanceQuality(ref table);
Assert.True(quality.Equals(estimatedQuality), $"Failed to estimate chrominance quality for standard table at quality level {quality}");
Assert.True(
quality.Equals(estimatedQuality),
$"Failed to estimate chrominance quality for standard table at quality level {quality}");
}
}
}

32
tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs

@ -190,6 +190,38 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
Assert.False(failed);
}
internal static bool CompareBlocks(Block8x8 a, Block8x8 b, int tolerance, out int diff)
{
bool res = CompareBlocks(a.AsFloatBlock(), b.AsFloatBlock(), tolerance + 1e-5f, out float fdiff);
diff = (int)fdiff;
return res;
}
internal static bool CompareBlocks(Block8x8F a, Block8x8F b, float tolerance, out float diff) =>
CompareBlocks(a.ToArray(), b.ToArray(), tolerance, out diff);
internal static bool CompareBlocks(Span<float> a, Span<float> b, float tolerance, out float diff)
{
var comparer = new ApproximateFloatComparer(tolerance);
bool failed = false;
diff = 0;
for (int i = 0; i < 64; i++)
{
float expected = a[i];
float actual = b[i];
diff += Math.Abs(expected - actual);
if (!comparer.Equals(expected, actual))
{
failed = true;
}
}
return !failed;
}
internal static JpegDecoderCore ParseJpegStream(string testFileName, bool metaDataOnly = false)
{
byte[] bytes = TestFile.Create(testFileName).Bytes;

2
tests/ImageSharp.Tests/Formats/Jpg/Utils/LibJpegTools.ComponentData.cs

@ -53,7 +53,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
{
this.MinVal = Math.Min(this.MinVal, data.Min());
this.MaxVal = Math.Max(this.MaxVal, data.Max());
this.SpectralBlocks[x, y] = new Block8x8(data);
this.SpectralBlocks[x, y] = Block8x8.Load(data);
}
public void LoadSpectralStride(Buffer2D<Block8x8> data, int strideIndex)

54
tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs

@ -15,18 +15,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
/// </summary>
internal static partial class ReferenceImplementations
{
public static unsafe void DequantizeBlock(Block8x8F* blockPtr, Block8x8F* qtPtr, byte* unzigPtr)
public static void DequantizeBlock(ref Block8x8F block, ref Block8x8F qt, ReadOnlySpan<byte> zigzag)
{
float* b = (float*)blockPtr;
float* qtp = (float*)qtPtr;
for (int qtIndex = 0; qtIndex < Block8x8F.Size; qtIndex++)
for (int i = 0; i < Block8x8F.Size; i++)
{
byte i = unzigPtr[qtIndex];
float* unzigPos = b + i;
float val = *unzigPos;
val *= qtp[qtIndex];
*unzigPos = val;
int zig = zigzag[i];
block[zig] *= qt[i];
}
}
@ -101,42 +95,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils
/// <summary>
/// Reference implementation to test <see cref="Block8x8F.Quantize"/>.
/// Rounding is done used an integer-based algorithm defined in <see cref="RationalRound(int,int)"/>.
/// </summary>
/// <param name="src">The input block</param>
/// <param name="dest">The destination block of integers</param>
/// <param name="qt">The quantization table</param>
/// <param name="unzigPtr">Pointer to <see cref="ZigZag.Data"/> </param>
public static unsafe void QuantizeRational(Block8x8F* src, int* dest, Block8x8F* qt, byte* unzigPtr)
/// <param name="src">The input block.</param>
/// <param name="dest">The destination block of 16bit integers.</param>
/// <param name="qt">The quantization table.</param>
/// <param name="zigzag">Zig-Zag index sequence span.</param>
public static void Quantize(ref Block8x8F src, ref Block8x8 dest, ref Block8x8F qt, ReadOnlySpan<byte> zigzag)
{
float* s = (float*)src;
float* q = (float*)qt;
for (int zig = 0; zig < Block8x8F.Size; zig++)
for (int i = 0; i < Block8x8F.Size; i++)
{
int a = (int)s[unzigPtr[zig]];
int b = (int)q[zig];
int val = RationalRound(a, b);
dest[zig] = val;
int zig = zigzag[i];
dest[i] = (short)Math.Round(src[zig] / qt[zig], MidpointRounding.AwayFromZero);
}
}
/// <summary>
/// Rounds a rational number defined as dividend/divisor into an integer.
/// </summary>
/// <param name="dividend">The dividend.</param>
/// <param name="divisor">The divisor.</param>
/// <returns>The rounded value.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int RationalRound(int dividend, int divisor)
{
if (dividend >= 0)
{
return (dividend + (divisor >> 1)) / divisor;
}
return -((-dividend + (divisor >> 1)) / divisor);
}
}
}

5
tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs

@ -13,8 +13,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
public void ZigZagCanHandleAllPossibleCoefficients()
{
// Mimic the behaviour of the huffman scan decoder using all possible byte values
var block = new short[64];
var zigzag = ZigZag.CreateUnzigTable();
short[] block = new short[64];
for (int h = 0; h < 255; h++)
{
@ -27,7 +26,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
if (s != 0)
{
i += r;
block[zigzag[i++]] = (short)s;
block[ZigZag.ZigZagOrder[i++]] = (short)s;
}
else
{

46
tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs

@ -301,6 +301,52 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities
}
}
/// <summary>
/// Runs the given test <paramref name="action"/> within an environment
/// where the given <paramref name="intrinsics"/> features.
/// </summary>
/// <param name="action">The test action to run.</param>
/// <param name="arg0">The value to pass as a parameter #0 to the test action.</param>
/// <param name="arg1">The value to pass as a parameter #1 to the test action.</param>
/// <param name="intrinsics">The intrinsics features.</param>
public static void RunWithHwIntrinsicsFeature<T>(
Action<string, string> action,
T arg0,
T arg1,
HwIntrinsics intrinsics)
where T : IConvertible
{
if (!RemoteExecutor.IsSupported)
{
return;
}
foreach (KeyValuePair<HwIntrinsics, string> intrinsic in intrinsics.ToFeatureKeyValueCollection())
{
var processStartInfo = new ProcessStartInfo();
if (intrinsic.Key != HwIntrinsics.AllowAll)
{
processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0";
RemoteExecutor.Invoke(
action,
arg0.ToString(),
arg1.ToString(),
new RemoteInvokeOptions
{
StartInfo = processStartInfo
})
.Dispose();
}
else
{
// Since we are running using the default architecture there is no
// point creating the overhead of running the action in a separate process.
action(arg0.ToString(), arg1.ToString());
}
}
}
internal static Dictionary<HwIntrinsics, string> ToFeatureKeyValueCollection(this HwIntrinsics intrinsics)
{
// Loop through and translate the given values into COMPlus equivaluents

Loading…
Cancel
Save