Browse Source

NormalizeColorsAndRoundAvx2() + JpegBlockPostProcessor cleanup

af/merge-core
Anton Firszov 9 years ago
parent
commit
57ccde4521
  1. 60
      src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs
  2. 129
      src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs
  3. 4
      src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs
  4. 3
      src/ImageSharp/Formats/Jpeg/Common/FastFloatingPointDCT.cs
  5. 26
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

60
src/ImageSharp/Formats/Jpeg/Common/Block8x8F.cs

@ -529,34 +529,39 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common
return result; return result;
} }
public void RoundInplace() public void NormalizeColorsAndRoundInplaceAvx2()
{
Vector<float> off = new Vector<float>(128f);
Vector<float> max = new Vector<float>(255F);
ref Vector<float> row0 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V0L);
row0 = NormalizeAndRound(row0, off, max);
ref Vector<float> row1 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V1L);
row1 = NormalizeAndRound(row1, off, max);
ref Vector<float> row2 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V2L);
row2 = NormalizeAndRound(row2, off, max);
ref Vector<float> row3 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V3L);
row3 = NormalizeAndRound(row3, off, max);
ref Vector<float> row4 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V4L);
row4 = NormalizeAndRound(row4, off, max);
ref Vector<float> row5 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V5L);
row5 = NormalizeAndRound(row5, off, max);
ref Vector<float> row6 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V6L);
row6 = NormalizeAndRound(row6, off, max);
ref Vector<float> row7 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V7L);
row7 = NormalizeAndRound(row7, off, max);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<float> NormalizeAndRound(Vector<float> row, Vector<float> off, Vector<float> max)
{ {
if (Vector<float>.Count == 8 && Vector<int>.Count == 8) row += off;
{ row = Vector.Max(row, Vector<float>.Zero);
ref Vector<float> row0 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V0L); row = Vector.Min(row, max);
row0 = row0.FastRound(); return row.FastRound();
ref Vector<float> row1 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V1L);
row1 = row1.FastRound();
ref Vector<float> row2 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V2L);
row2 = row2.FastRound();
ref Vector<float> row3 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V3L);
row3 = row3.FastRound();
ref Vector<float> row4 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V4L);
row4 = row4.FastRound();
ref Vector<float> row5 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V5L);
row5 = row5.FastRound();
ref Vector<float> row6 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V6L);
row6 = row6.FastRound();
ref Vector<float> row7 = ref Unsafe.As<Vector4, Vector<float>>(ref this.V7L);
row7 = row7.FastRound();
}
else
{
this.RoundInplaceSlow();
}
} }
internal void RoundInplaceSlow() public void RoundInplace()
{ {
for (int i = 0; i < Size; i++) for (int i = 0; i < Size; i++)
{ {
@ -598,10 +603,5 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common
DebugGuard.MustBeLessThan(idx, Size, nameof(idx)); DebugGuard.MustBeLessThan(idx, Size, nameof(idx));
DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx)); DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx));
} }
[StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(float))]
private struct Row
{
}
} }
} }

129
src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegBlockPostProcessor.cs

@ -14,14 +14,24 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder
internal unsafe struct JpegBlockPostProcessor internal unsafe struct JpegBlockPostProcessor
{ {
/// <summary> /// <summary>
/// The <see cref="ComputationData"/> /// Source block
/// </summary> /// </summary>
private ComputationData data; public Block8x8F SourceBlock;
/// <summary> /// <summary>
/// Pointers to elements of <see cref="data"/> /// Temporal block 1 to store intermediate and/or final computation results
/// </summary> /// </summary>
private DataPointers pointers; public Block8x8F WorkspaceBlock1;
/// <summary>
/// Temporal block 2 to store intermediate and/or final computation results
/// </summary>
public Block8x8F WorkspaceBlock2;
/// <summary>
/// The quantization table as <see cref="Block8x8F"/>
/// </summary>
public Block8x8F DequantiazationTable;
private Size subSamplingDivisors; private Size subSamplingDivisors;
@ -30,11 +40,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder
/// </summary> /// </summary>
public static void Init(JpegBlockPostProcessor* postProcessor, IRawJpegData decoder, IJpegComponent component) public static void Init(JpegBlockPostProcessor* postProcessor, IRawJpegData decoder, IJpegComponent component)
{ {
postProcessor->data = ComputationData.Create();
postProcessor->pointers = new DataPointers(&postProcessor->data);
int qtIndex = component.QuantizationTableIndex; int qtIndex = component.QuantizationTableIndex;
postProcessor->data.DequantiazationTable = ZigZag.CreateDequantizationTable(ref decoder.QuantizationTables[qtIndex]); postProcessor->DequantiazationTable = ZigZag.CreateDequantizationTable(ref decoder.QuantizationTables[qtIndex]);
postProcessor->subSamplingDivisors = component.SubSamplingDivisors; postProcessor->subSamplingDivisors = component.SubSamplingDivisors;
} }
@ -42,110 +49,28 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder
ref Block8x8 sourceBlock, ref Block8x8 sourceBlock,
BufferArea<float> destArea) BufferArea<float> destArea)
{ {
sourceBlock.CopyToFloatBlock(ref this.data.SourceBlock); ref Block8x8F b = ref this.SourceBlock;
sourceBlock.CopyToFloatBlock(ref b);
Block8x8F* b = this.pointers.SourceBlock;
// Dequantize: // Dequantize:
b->MultiplyInplace(ref this.data.DequantiazationTable); b.MultiplyInplace(ref this.DequantiazationTable);
FastFloatingPointDCT.TransformIDCT(ref *b, ref this.data.WorkspaceBlock1, ref this.data.WorkspaceBlock2); FastFloatingPointDCT.TransformIDCT(ref b, ref this.WorkspaceBlock1, ref this.WorkspaceBlock2);
this.data.WorkspaceBlock1.NormalizeColorsInplace();
// To conform better to libjpeg we actually NEED TO loose precision here. // To conform better to libjpeg we actually NEED TO loose precision here.
// This is because they store blocks as Int16 between all the operations. // This is because they store blocks as Int16 between all the operations.
// Unfortunately, we need to emulate this to be "more accurate" :( // To be "more accurate", we need to emulate this by rounding!
this.data.WorkspaceBlock1.RoundInplace(); if (SimdUtils.IsAvx2CompatibleArchitecture)
this.data.WorkspaceBlock1.CopyTo(destArea, this.subSamplingDivisors.Width, this.subSamplingDivisors.Height);
}
/// <summary>
/// Holds the "large" data blocks needed for computations.
/// </summary>
[StructLayout(LayoutKind.Sequential)]
public struct ComputationData
{
/// <summary>
/// Source block
/// </summary>
public Block8x8F SourceBlock;
/// <summary>
/// Temporal block 1 to store intermediate and/or final computation results
/// </summary>
public Block8x8F WorkspaceBlock1;
/// <summary>
/// Temporal block 2 to store intermediate and/or final computation results
/// </summary>
public Block8x8F WorkspaceBlock2;
/// <summary>
/// The quantization table as <see cref="Block8x8F"/>
/// </summary>
public Block8x8F DequantiazationTable;
/// <summary>
/// The jpeg unzig data
/// </summary>
public ZigZag Unzig;
/// <summary>
/// Creates and initializes a new <see cref="ComputationData"/> instance
/// </summary>
/// <returns>The <see cref="ComputationData"/></returns>
public static ComputationData Create()
{ {
var data = default(ComputationData); this.WorkspaceBlock1.NormalizeColorsAndRoundInplaceAvx2();
data.Unzig = ZigZag.CreateUnzigTable();
return data;
} }
} else
/// <summary>
/// Contains pointers to the memory regions of <see cref="ComputationData"/> so they can be easily passed around to pointer based utility methods of <see cref="Block8x8F"/>
/// </summary>
public struct DataPointers
{
/// <summary>
/// Pointer to <see cref="ComputationData.SourceBlock"/>
/// </summary>
public Block8x8F* SourceBlock;
/// <summary>
/// Pointer to <see cref="ComputationData.WorkspaceBlock1"/>
/// </summary>
public Block8x8F* WorkspaceBlock1;
/// <summary>
/// Pointer to <see cref="ComputationData.WorkspaceBlock2"/>
/// </summary>
public Block8x8F* WorkspaceBlock2;
/// <summary>
/// Pointer to <see cref="ComputationData.DequantiazationTable"/>
/// </summary>
public Block8x8F* DequantiazationTable;
/// <summary>
/// Pointer to <see cref="ComputationData.Unzig"/> as int*
/// </summary>
public int* Unzig;
/// <summary>
/// Initializes a new instance of the <see cref="DataPointers" /> struct.
/// </summary>
/// <param name="dataPtr">Pointer to <see cref="ComputationData"/></param>
internal DataPointers(ComputationData* dataPtr)
{ {
this.SourceBlock = &dataPtr->SourceBlock; this.WorkspaceBlock1.NormalizeColorsInplace();
this.WorkspaceBlock1 = &dataPtr->WorkspaceBlock1; this.WorkspaceBlock1.RoundInplace();
this.WorkspaceBlock2 = &dataPtr->WorkspaceBlock2;
this.DequantiazationTable = &dataPtr->DequantiazationTable;
this.Unzig = dataPtr->Unzig.Data;
} }
this.WorkspaceBlock1.CopyTo(destArea, this.subSamplingDivisors.Width, this.subSamplingDivisors.Height);
} }
} }
} }

4
src/ImageSharp/Formats/Jpeg/Common/Decoder/JpegColorConverter.FromYCbCr.cs

@ -123,12 +123,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common.Decoder
if (Vector<float>.Count == 4) if (Vector<float>.Count == 4)
{ {
// TODO: Find a way to properly run & test this path on modern AVX2 PC-s! (Have I already mentioned that Vector<T> is terrible?) // TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector<T> is terrible?)
r.RoundAndDownscaleBasic(); r.RoundAndDownscaleBasic();
g.RoundAndDownscaleBasic(); g.RoundAndDownscaleBasic();
b.RoundAndDownscaleBasic(); b.RoundAndDownscaleBasic();
} }
else if (Vector<float>.Count == 8) else if (SimdUtils.IsAvx2CompatibleArchitecture)
{ {
r.RoundAndDownscaleAvx2(); r.RoundAndDownscaleAvx2();
g.RoundAndDownscaleAvx2(); g.RoundAndDownscaleAvx2();

3
src/ImageSharp/Formats/Jpeg/Common/FastFloatingPointDCT.cs

@ -50,7 +50,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Common
/// <param name="temp">Temporary block provided by the caller</param> /// <param name="temp">Temporary block provided by the caller</param>
public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp) public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp)
{ {
// TODO: Transpose is a bottleneck now. We need full AVX support to optimize it:
// https://github.com/dotnet/corefx/issues/22940
src.TransposeInto(ref temp); src.TransposeInto(ref temp);
IDCT8x4_LeftPart(ref temp, ref dest); IDCT8x4_LeftPart(ref temp, ref dest);
IDCT8x4_RightPart(ref temp, ref dest); IDCT8x4_RightPart(ref temp, ref dest);

26
tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

@ -297,6 +297,30 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
} }
} }
[Theory]
[InlineData(1)]
[InlineData(2)]
public void NormalizeColorsAndRoundAvx2(int seed)
{
if (!SimdUtils.IsAvx2CompatibleArchitecture)
{
this.Output.WriteLine("AVX2 not supported, skipping!");
return;
}
Block8x8F source = CreateRandomFloatBlock(-200, 200, seed);
Block8x8F expected = source;
expected.NormalizeColorsInplace();
expected.RoundInplace();
Block8x8F actual = source;
actual.NormalizeColorsAndRoundInplaceAvx2();
this.Output.WriteLine(expected.ToString());
this.Output.WriteLine(actual.ToString());
this.CompareBlocks(expected, actual, 0);
}
[Theory] [Theory]
[InlineData(1)] [InlineData(1)]
@ -352,7 +376,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[InlineData(1)] [InlineData(1)]
[InlineData(2)] [InlineData(2)]
[InlineData(3)] [InlineData(3)]
public void RoundInplace(int seed) public void RoundInplaceSlow(int seed)
{ {
Block8x8F s = CreateRandomFloatBlock(-500, 500, seed); Block8x8F s = CreateRandomFloatBlock(-500, 500, seed);

Loading…
Cancel
Save