Browse Source

NormalizeColorsAndRoundAvx2() + JpegBlockPostProcessor cleanup

af/merge-core
Anton Firszov 9 years ago
parent
commit
57ccde4521
  1. 60
      src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs
  2. 129
      src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs
  3. 4
      src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs
  4. 3
      src/ImageSharp/Formats/Jpeg/Common/FastFloatingPointDCT.cs
  5. 26
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

60
src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs

@ -529,34 +529,39 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common
return result;
}
public void RoundInplace()
public void NormalizeColorsAndRoundInplaceAvx2()
{
Vector<float> off = new Vector<float>(128f);
Vector<float> max = new Vector<float>(255F);
ref Vector<float> row0 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V0L);
row0 = NormalizeAndRound(row0, off, max);
ref Vector<float> row1 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V1L);
row1 = NormalizeAndRound(row1, off, max);
ref Vector<float> row2 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V2L);
row2 = NormalizeAndRound(row2, off, max);
ref Vector<float> row3 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V3L);
row3 = NormalizeAndRound(row3, off, max);
ref Vector<float> row4 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V4L);
row4 = NormalizeAndRound(row4, off, max);
ref Vector<float> row5 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V5L);
row5 = NormalizeAndRound(row5, off, max);
ref Vector<float> row6 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V6L);
row6 = NormalizeAndRound(row6, off, max);
ref Vector<float> row7 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V7L);
row7 = NormalizeAndRound(row7, off, max);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<float> NormalizeAndRound(Vector<float> row, Vector<float> off, Vector<float> max)
{
if (Vector<float>.Count == 8 && Vector<int>.Count == 8)
{
ref Vector<float> row0 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V0L);
row0 = row0.FastRound();
ref Vector<float> row1 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V1L);
row1 = row1.FastRound();
ref Vector<float> row2 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V2L);
row2 = row2.FastRound();
ref Vector<float> row3 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V3L);
row3 = row3.FastRound();
ref Vector<float> row4 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V4L);
row4 = row4.FastRound();
ref Vector<float> row5 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V5L);
row5 = row5.FastRound();
ref Vector<float> row6 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V6L);
row6 = row6.FastRound();
ref Vector<float> row7 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V7L);
row7 = row7.FastRound();
}
else
{
this.RoundInplaceSlow();
}
row += off;
row = Vector.Max(row, Vector<float>.Zero);
row = Vector.Min(row, max);
return row.FastRound();
}
internal void RoundInplaceSlow()
public void RoundInplace()
{
for (int i = 0; i < Size; i++)
{
@ -598,10 +603,5 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common
DebugGuard.MustBeLessThan(idx, Size, nameof(idx));
DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx));
}
[StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(float))]
private struct Row
{
}
}
}

129
src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs

@ -14,14 +14,24 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder
internal unsafe struct JpegBlockPostProcessor
{
/// <summary>
/// The <see cref="ComputationData"/>
/// Source block
/// </summary>
private ComputationData data;
public Block8x8F SourceBlock;
/// <summary>
/// Pointers to elements of <see cref="data"/>
/// Temporal block 1 to store intermediate and/or final computation results
/// </summary>
private DataPointers pointers;
public Block8x8F WorkspaceBlock1;
/// <summary>
/// Temporal block 2 to store intermediate and/or final computation results
/// </summary>
public Block8x8F WorkspaceBlock2;
/// <summary>
/// The quantization table as <see cref="Block8x8F"/>
/// </summary>
public Block8x8F DequantiazationTable;
private Size subSamplingDivisors;
@ -30,11 +40,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder
/// </summary>
public static void Init(JpegBlockPostProcessor* postProcessor, IRawJpegData decoder, IJpegComponent component)
{
postProcessor->data = ComputationData.Create();
postProcessor->pointers = new DataPointers(&postProcessor->data);
int qtIndex = component.QuantizationTableIndex;
postProcessor->data.DequantiazationTable = ZigZag.CreateDequantizationTable(ref decoder.QuantizationTables[qtIndex]);
postProcessor->DequantiazationTable = ZigZag.CreateDequantizationTable(ref decoder.QuantizationTables[qtIndex]);
postProcessor->subSamplingDivisors = component.SubSamplingDivisors;
}
@ -42,110 +49,28 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder
ref Block8x8 sourceBlock,
BufferArea<float> destArea)
{
sourceBlock.CopyToFloatBlock(ref this.data.SourceBlock);
Block8x8F* b = this.pointers.SourceBlock;
ref Block8x8F b = ref this.SourceBlock;
sourceBlock.CopyToFloatBlock(ref b);
// Dequantize:
b->MultiplyInplace(ref this.data.DequantiazationTable);
b.MultiplyInplace(ref this.DequantiazationTable);
FastFloatingPointDCT.TransformIDCT(ref *b, ref this.data.WorkspaceBlock1, ref this.data.WorkspaceBlock2);
this.data.WorkspaceBlock1.NormalizeColorsInplace();
FastFloatingPointDCT.TransformIDCT(ref b, ref this.WorkspaceBlock1, ref this.WorkspaceBlock2);
// To conform better to libjpeg we actually NEED TO loose precision here.
// This is because they store blocks as Int16 between all the operations.
// Unfortunately, we need to emulate this to be "more accurate" :(
this.data.WorkspaceBlock1.RoundInplace();
this.data.WorkspaceBlock1.CopyTo(destArea, this.subSamplingDivisors.Width, this.subSamplingDivisors.Height);
}
/// <summary>
/// Holds the "large" data blocks needed for computations.
/// </summary>
[StructLayout(LayoutKind.Sequential)]
public struct ComputationData
{
/// <summary>
/// Source block
/// </summary>
public Block8x8F SourceBlock;
/// <summary>
/// Temporal block 1 to store intermediate and/or final computation results
/// </summary>
public Block8x8F WorkspaceBlock1;
/// <summary>
/// Temporal block 2 to store intermediate and/or final computation results
/// </summary>
public Block8x8F WorkspaceBlock2;
/// <summary>
/// The quantization table as <see cref="Block8x8F"/>
/// </summary>
public Block8x8F DequantiazationTable;
/// <summary>
/// The jpeg unzig data
/// </summary>
public ZigZag Unzig;
/// <summary>
/// Creates and initializes a new <see cref="ComputationData"/> instance
/// </summary>
/// <returns>The <see cref="ComputationData"/></returns>
public static ComputationData Create()
// To be "more accurate", we need to emulate this by rounding!
if (SimdUtils.IsAvx2CompatibleArchitecture)
{
var data = default(ComputationData);
data.Unzig = ZigZag.CreateUnzigTable();
return data;
this.WorkspaceBlock1.NormalizeColorsAndRoundInplaceAvx2();
}
}
/// <summary>
/// Contains pointers to the memory regions of <see cref="ComputationData"/> so they can be easily passed around to pointer based utility methods of <see cref="Block8x8F"/>
/// </summary>
public struct DataPointers
{
/// <summary>
/// Pointer to <see cref="ComputationData.SourceBlock"/>
/// </summary>
public Block8x8F* SourceBlock;
/// <summary>
/// Pointer to <see cref="ComputationData.WorkspaceBlock1"/>
/// </summary>
public Block8x8F* WorkspaceBlock1;
/// <summary>
/// Pointer to <see cref="ComputationData.WorkspaceBlock2"/>
/// </summary>
public Block8x8F* WorkspaceBlock2;
/// <summary>
/// Pointer to <see cref="ComputationData.DequantiazationTable"/>
/// </summary>
public Block8x8F* DequantiazationTable;
/// <summary>
/// Pointer to <see cref="ComputationData.Unzig"/> as int*
/// </summary>
public int* Unzig;
/// <summary>
/// Initializes a new instance of the <see cref="DataPointers" /> struct.
/// </summary>
/// <param name="dataPtr">Pointer to <see cref="ComputationData"/></param>
internal DataPointers(ComputationData* dataPtr)
else
{
this.SourceBlock = &dataPtr->SourceBlock;
this.WorkspaceBlock1 = &dataPtr->WorkspaceBlock1;
this.WorkspaceBlock2 = &dataPtr->WorkspaceBlock2;
this.DequantiazationTable = &dataPtr->DequantiazationTable;
this.Unzig = dataPtr->Unzig.Data;
this.WorkspaceBlock1.NormalizeColorsInplace();
this.WorkspaceBlock1.RoundInplace();
}
this.WorkspaceBlock1.CopyTo(destArea, this.subSamplingDivisors.Width, this.subSamplingDivisors.Height);
}
}
}

4
src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs

@ -123,12 +123,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder
if (Vector<float>.Count == 4)
{
// TODO: Find a way to properly run & test this path on modern AVX2 PC-s! (Have I already mentioned that Vector<T> is terrible?)
// TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector<T> is terrible?)
r.RoundAndDownscaleBasic();
g.RoundAndDownscaleBasic();
b.RoundAndDownscaleBasic();
}
else if (Vector<float>.Count == 8)
else if (SimdUtils.IsAvx2CompatibleArchitecture)
{
r.RoundAndDownscaleAvx2();
g.RoundAndDownscaleAvx2();

3
src/ImageSharp/Formats/Jpeg/Common/FastFloatingPointDCT.cs

@ -50,7 +50,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common
/// <param name="temp">Temporary block provided by the caller</param>
public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp)
{
// TODO: Transpose is a bottleneck now. We need full AVX support to optimize it:
// https://github.com/dotnet/corefx/issues/22940
src.TransposeInto(ref temp);
IDCT8x4_LeftPart(ref temp, ref dest);
IDCT8x4_RightPart(ref temp, ref dest);

26
tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

@ -297,6 +297,30 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
}
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void NormalizeColorsAndRoundAvx2(int seed)
{
if (!SimdUtils.IsAvx2CompatibleArchitecture)
{
this.Output.WriteLine("AVX2 not supported, skipping!");
return;
}
Block8x8F source = CreateRandomFloatBlock(-200, 200, seed);
Block8x8F expected = source;
expected.NormalizeColorsInplace();
expected.RoundInplace();
Block8x8F actual = source;
actual.NormalizeColorsAndRoundInplaceAvx2();
this.Output.WriteLine(expected.ToString());
this.Output.WriteLine(actual.ToString());
this.CompareBlocks(expected, actual, 0);
}
[Theory]
[InlineData(1)]
@ -352,7 +376,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public void RoundInplace(int seed)
public void RoundInplaceSlow(int seed)
{
Block8x8F s = CreateRandomFloatBlock(-500, 500, seed);

Loading…
Cancel
Save