Browse Source

Optimized UnzigDivRound

pull/84/head
Anton Firszov 9 years ago
parent
commit
6baff43cb7
  1. 60
      src/ImageSharp.Formats.Jpeg/Components/Block8x8F.cs
  2. 31
      src/ImageSharp.Formats.Jpeg/JpegEncoderCore.cs
  3. 1
      tests/ImageSharp.Benchmarks/General/RoundSinglePrecisionBlocks.cs
  4. 61
      tests/ImageSharp.Benchmarks/General/Vector4Constants.cs
  5. 31
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
  6. 26
      tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs

60
src/ImageSharp.Formats.Jpeg/Components/Block8x8F.cs

@ -327,10 +327,11 @@ namespace ImageSharp.Formats.Jpg
}
/// <summary>
/// Unzig the elements of src into dest, while dividing them by elements of qt and rounding the values
/// Unzig the elements of src into dest, while dividing them by elements of qt and rounding the values.
/// Sore the result to the memory area pointed by dest.
/// </summary>
/// <param name="src">Source block</param>
/// <param name="dest">Destination block</param>
/// <param name="dest">Destination block of integers</param>
/// <param name="qt">Quantization table</param>
/// <param name="unzigPtr">Pointer to <see cref="UnzigData"/> elements</param>
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
@ -349,6 +350,31 @@ namespace ImageSharp.Formats.Jpg
}
}
/// <summary>
/// Unzig the elements of block into dest, while dividing them by elements of qt and "pre-rounding" the values.
/// To finish the rounding it's enough to (int)-cast these values.
/// </summary>
/// <param name="block">Source block</param>
/// <param name="dest">Destination block</param>
/// <param name="qt">The quantization table</param>
/// <param name="unzigPtr">Pointer to elements of <see cref="UnzigData"/></param>
public static unsafe void UnzigDivRound(
Block8x8F* block,
Block8x8F* dest,
Block8x8F* qt,
int* unzigPtr)
{
float* s = (float*)block;
float* d = (float*)dest;
for (int zig = 0; zig < ScalarCount; zig++)
{
d[zig] = s[unzigPtr[zig]];
}
DivideRoundAll(ref *dest, ref *qt);
}
/// <summary>
/// Scales the 16x16 region represented by the 4 source blocks to the 8x8 DST block.
/// </summary>
@ -391,5 +417,35 @@ namespace ImageSharp.Formats.Jpg
return -((-dividend + (divisor >> 1)) / divisor);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b)
{
a.V0L = DivideRound(a.V0L, b.V0L);
a.V0R = DivideRound(a.V0R, b.V0R);
a.V1L = DivideRound(a.V1L, b.V1L);
a.V1R = DivideRound(a.V1R, b.V1R);
a.V2L = DivideRound(a.V2L, b.V2L);
a.V2R = DivideRound(a.V2R, b.V2R);
a.V3L = DivideRound(a.V3L, b.V3L);
a.V3R = DivideRound(a.V3R, b.V3R);
a.V4L = DivideRound(a.V4L, b.V4L);
a.V4R = DivideRound(a.V4R, b.V4R);
a.V5L = DivideRound(a.V5L, b.V5L);
a.V5R = DivideRound(a.V5R, b.V5R);
a.V6L = DivideRound(a.V6L, b.V6L);
a.V6R = DivideRound(a.V6R, b.V6R);
a.V7L = DivideRound(a.V7L, b.V7L);
a.V7R = DivideRound(a.V7R, b.V7R);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor)
{
Vector4 sign = Vector4.Min(dividend, Vector4.One);
sign = Vector4.Max(sign, new Vector4(-1));
return (dividend / divisor) + (sign * new Vector4(0.5f));
}
}
}

31
src/ImageSharp.Formats.Jpeg/JpegEncoderCore.cs

@ -452,8 +452,6 @@ namespace ImageSharp.Formats
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
int* unzigDest = stackalloc int[Block8x8F.ScalarCount];
using (PixelArea<TColor> rgbBytes = new PixelArea<TColor>(8, 8, ComponentOrder.Xyz))
{
for (int y = 0; y < pixels.Height; y += 8)
@ -467,7 +465,6 @@ namespace ImageSharp.Formats
prevDCY,
&b,
&temp1,
unzigDest,
&temp2,
&onStackLuminanceQuantTable,
unzig.Data);
@ -476,7 +473,6 @@ namespace ImageSharp.Formats
prevDCCb,
&cb,
&temp1,
unzigDest,
&temp2,
&onStackChrominanceQuantTable,
unzig.Data);
@ -485,7 +481,6 @@ namespace ImageSharp.Formats
prevDCCr,
&cr,
&temp1,
unzigDest,
&temp2,
&onStackChrominanceQuantTable,
unzig.Data);
@ -542,9 +537,8 @@ namespace ImageSharp.Formats
/// <param name="index">The quantization table index.</param>
/// <param name="prevDC">The previous DC value.</param>
/// <param name="src">Source block</param>
/// <param name="tempDest">Temporal block to be used as FDCT Destination</param>
/// <param name="d">Working buffer for unzigged stuff</param>
/// <param name="tempWorker">Temporal block 2</param>
/// <param name="tempDest1">Temporal block to be used as FDCT Destination</param>
/// <param name="tempDest2">Temporal block 2</param>
/// <param name="quant">Quantization table</param>
/// <param name="unzigPtr">The 8x8 Unzig block pointer</param>
/// <returns>
@ -554,19 +548,19 @@ namespace ImageSharp.Formats
QuantIndex index,
int prevDC,
Block8x8F* src,
Block8x8F* tempDest,
int* d,
Block8x8F* tempWorker,
Block8x8F* tempDest1,
Block8x8F* tempDest2,
Block8x8F* quant,
int* unzigPtr)
{
DCT.TransformFDCT(ref *src, ref *tempDest, ref *tempWorker);
DCT.TransformFDCT(ref *src, ref *tempDest1, ref *tempDest2);
Block8x8F.UnZigDivRound(tempDest, d, quant, unzigPtr);
Block8x8F.UnzigDivRound(tempDest1, tempDest2, quant, unzigPtr);
float* unziggedDestPtr = (float*)tempDest2;
// Emit the DC delta.
int dc = d[0];
int dc = (int)unziggedDestPtr[0];
// Emit the DC delta.
this.EmitHuffRLE((HuffIndex)((2 * (int)index) + 0), 0, dc - prevDC);
// Emit the AC components.
@ -575,7 +569,7 @@ namespace ImageSharp.Formats
for (int zig = 1; zig < Block8x8F.ScalarCount; zig++)
{
int ac = d[zig];
int ac = (int)unziggedDestPtr[zig];
if (ac == 0)
{
@ -823,8 +817,6 @@ namespace ImageSharp.Formats
UnzigData unzig = UnzigData.Create();
int* unzigDest = stackalloc int[Block8x8F.ScalarCount];
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
@ -846,7 +838,6 @@ namespace ImageSharp.Formats
prevDCY,
&b,
&temp1,
unzigDest,
&temp2,
&onStackLuminanceQuantTable,
unzig.Data);
@ -858,7 +849,6 @@ namespace ImageSharp.Formats
prevDCCb,
&b,
&temp1,
unzigDest,
&temp2,
&onStackChrominanceQuantTable,
unzig.Data);
@ -869,7 +859,6 @@ namespace ImageSharp.Formats
prevDCCr,
&b,
&temp1,
unzigDest,
&temp2,
&onStackChrominanceQuantTable,
unzig.Data);

1
tests/ImageSharp.Benchmarks/General/RoundSinglePrecisionBlocks.cs

@ -96,7 +96,6 @@
Block8x8F bDividend = this.inputDividend;
Block8x8F bDivisor = this.inputDivisior;
float* pDividend = (float*)&bDividend;
float* pDivisor = (float*)&bDivisor;
for (int cnt = 0; cnt < ExecutionCount; cnt++)
{

61
tests/ImageSharp.Benchmarks/General/Vector4Constants.cs

@ -0,0 +1,61 @@
namespace ImageSharp.Benchmarks.General
{
using System;
using System.Numerics;
using BenchmarkDotNet.Attributes;
/// <summary>
/// Has it any effect on performance to store SIMD constants as static readonly fields? Is it OK to always inline them?
/// Spoiler: the difference seems to be statistically insignificant!
/// </summary>
public class Vector4Constants
{
private static readonly Vector4 A = new Vector4(1.2f);
private static readonly Vector4 B = new Vector4(3.4f);
private static readonly Vector4 C = new Vector4(5.6f);
private static readonly Vector4 D = new Vector4(7.8f);
private Random random = null;
private Vector4 parameter;
[Setup]
public void Setup()
{
this.random = new Random(42);
this.parameter = new Vector4(
this.GetRandomFloat(),
this.GetRandomFloat(),
this.GetRandomFloat(),
this.GetRandomFloat()
);
}
[Benchmark(Baseline = true)]
public Vector4 Static()
{
Vector4 p = this.parameter;
Vector4 x = p * A / B + p * C / D;
Vector4 y = p / A * B + p / C * D;
Vector4 z = Vector4.Min(p, A);
Vector4 w = Vector4.Max(p, B);
return x + y + z + w;
}
[Benchmark]
public Vector4 Inlined()
{
Vector4 p = this.parameter;
Vector4 x = p * new Vector4(1.2f) / new Vector4(2.3f) + p * new Vector4(4.5f) / new Vector4(6.7f);
Vector4 y = p / new Vector4(1.2f) * new Vector4(2.3f) + p / new Vector4(4.5f) * new Vector4(6.7f);
Vector4 z = Vector4.Min(p, new Vector4(1.2f));
Vector4 w = Vector4.Max(p, new Vector4(2.3f));
return x + y + z + w;
}
private float GetRandomFloat() => (float)this.random.NextDouble();
}
}

31
tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

@ -12,6 +12,8 @@ namespace ImageSharp.Tests
{
using System.Diagnostics;
using System.Numerics;
using ImageSharp.Formats;
using ImageSharp.Formats.Jpg;
using Xunit;
@ -431,5 +433,34 @@ namespace ImageSharp.Tests
Assert.Equal(actualDest.Data, expectedDest.Data, new ApproximateFloatComparer(1f));
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public unsafe void UnzigDivRound(int seed)
{
Block8x8F block = new Block8x8F();
block.LoadFrom(Create8x8RandomFloatData(-2000, 2000, seed));
Block8x8F qt = new Block8x8F();
qt.LoadFrom(Create8x8RandomFloatData(-2000, 2000, seed));
UnzigData unzig = UnzigData.Create();
int* expectedResults = stackalloc int[Block8x8F.ScalarCount];
ReferenceImplementations.UnZigDivRoundRational(&block, expectedResults, &qt, unzig.Data);
Block8x8F actualResults = default(Block8x8F);
Block8x8F.UnzigDivRound(&block, &actualResults, &qt, unzig.Data);
for (int i = 0; i < Block8x8F.ScalarCount; i++)
{
int expected = expectedResults[i];
int actual = (int)actualResults[i];
Assert.Equal(expected, actual);
}
}
}
}

26
tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs

@ -875,5 +875,31 @@ namespace ImageSharp.Tests
}
}
}
public static unsafe void UnZigDivRoundRational(Block8x8F* src, int* dest, Block8x8F* qt, int* unzigPtr)
{
float* s = (float*)src;
float* q = (float*)qt;
for (int zig = 0; zig < Block8x8F.ScalarCount; zig++)
{
int a = (int)s[unzigPtr[zig]];
int b = (int)q[zig];
int val = RationalRound(a, b);
dest[zig] = val;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int RationalRound(int dividend, int divisor)
{
if (dividend >= 0)
{
return (dividend + (divisor >> 1)) / divisor;
}
return -((-dividend + (divisor >> 1)) / divisor);
}
}
}
Loading…
Cancel
Save