diff --git a/src/ImageSharp.Formats.Jpeg/Components/Block8x8F.cs b/src/ImageSharp.Formats.Jpeg/Components/Block8x8F.cs
index e21ba2d02..ec0feaf45 100644
--- a/src/ImageSharp.Formats.Jpeg/Components/Block8x8F.cs
+++ b/src/ImageSharp.Formats.Jpeg/Components/Block8x8F.cs
@@ -327,10 +327,11 @@ namespace ImageSharp.Formats.Jpg
}
///
- /// Unzig the elements of src into dest, while dividing them by elements of qt and rounding the values
+ /// Unzig the elements of src into dest, while dividing them by elements of qt and rounding the values.
+ /// Sore the result to the memory area pointed by dest.
///
/// Source block
- /// Destination block
+ /// Destination block of integers
/// Quantization table
/// Pointer to elements
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -349,6 +350,31 @@ namespace ImageSharp.Formats.Jpg
}
}
+ ///
+ /// Unzig the elements of block into dest, while dividing them by elements of qt and "pre-rounding" the values.
+ /// To finish the rounding it's enough to (int)-cast these values.
+ ///
+ /// Source block
+ /// Destination block
+ /// The quantization table
+ /// Pointer to elements of
+ public static unsafe void UnzigDivRound(
+ Block8x8F* block,
+ Block8x8F* dest,
+ Block8x8F* qt,
+ int* unzigPtr)
+ {
+ float* s = (float*)block;
+ float* d = (float*)dest;
+
+ for (int zig = 0; zig < ScalarCount; zig++)
+ {
+ d[zig] = s[unzigPtr[zig]];
+ }
+
+ DivideRoundAll(ref *dest, ref *qt);
+ }
+
///
/// Scales the 16x16 region represented by the 4 source blocks to the 8x8 DST block.
///
@@ -391,5 +417,35 @@ namespace ImageSharp.Formats.Jpg
return -((-dividend + (divisor >> 1)) / divisor);
}
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b)
+ {
+ a.V0L = DivideRound(a.V0L, b.V0L);
+ a.V0R = DivideRound(a.V0R, b.V0R);
+ a.V1L = DivideRound(a.V1L, b.V1L);
+ a.V1R = DivideRound(a.V1R, b.V1R);
+ a.V2L = DivideRound(a.V2L, b.V2L);
+ a.V2R = DivideRound(a.V2R, b.V2R);
+ a.V3L = DivideRound(a.V3L, b.V3L);
+ a.V3R = DivideRound(a.V3R, b.V3R);
+ a.V4L = DivideRound(a.V4L, b.V4L);
+ a.V4R = DivideRound(a.V4R, b.V4R);
+ a.V5L = DivideRound(a.V5L, b.V5L);
+ a.V5R = DivideRound(a.V5R, b.V5R);
+ a.V6L = DivideRound(a.V6L, b.V6L);
+ a.V6R = DivideRound(a.V6R, b.V6R);
+ a.V7L = DivideRound(a.V7L, b.V7L);
+ a.V7R = DivideRound(a.V7R, b.V7R);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor)
+ {
+ Vector4 sign = Vector4.Min(dividend, Vector4.One);
+ sign = Vector4.Max(sign, new Vector4(-1));
+
+ return (dividend / divisor) + (sign * new Vector4(0.5f));
+ }
}
}
\ No newline at end of file
diff --git a/src/ImageSharp.Formats.Jpeg/JpegEncoderCore.cs b/src/ImageSharp.Formats.Jpeg/JpegEncoderCore.cs
index aa7f2495d..b0e442eaa 100644
--- a/src/ImageSharp.Formats.Jpeg/JpegEncoderCore.cs
+++ b/src/ImageSharp.Formats.Jpeg/JpegEncoderCore.cs
@@ -452,8 +452,6 @@ namespace ImageSharp.Formats
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
- int* unzigDest = stackalloc int[Block8x8F.ScalarCount];
-
using (PixelArea rgbBytes = new PixelArea(8, 8, ComponentOrder.Xyz))
{
for (int y = 0; y < pixels.Height; y += 8)
@@ -467,7 +465,6 @@ namespace ImageSharp.Formats
prevDCY,
&b,
&temp1,
- unzigDest,
&temp2,
&onStackLuminanceQuantTable,
unzig.Data);
@@ -476,7 +473,6 @@ namespace ImageSharp.Formats
prevDCCb,
&cb,
&temp1,
- unzigDest,
&temp2,
&onStackChrominanceQuantTable,
unzig.Data);
@@ -485,7 +481,6 @@ namespace ImageSharp.Formats
prevDCCr,
&cr,
&temp1,
- unzigDest,
&temp2,
&onStackChrominanceQuantTable,
unzig.Data);
@@ -542,9 +537,8 @@ namespace ImageSharp.Formats
/// The quantization table index.
/// The previous DC value.
/// Source block
- /// Temporal block to be used as FDCT Destination
- /// Working buffer for unzigged stuff
- /// Temporal block 2
+ /// Temporal block to be used as FDCT Destination
+ /// Temporal block 2
/// Quantization table
/// The 8x8 Unzig block pointer
///
@@ -554,19 +548,19 @@ namespace ImageSharp.Formats
QuantIndex index,
int prevDC,
Block8x8F* src,
- Block8x8F* tempDest,
- int* d,
- Block8x8F* tempWorker,
+ Block8x8F* tempDest1,
+ Block8x8F* tempDest2,
Block8x8F* quant,
int* unzigPtr)
{
- DCT.TransformFDCT(ref *src, ref *tempDest, ref *tempWorker);
+ DCT.TransformFDCT(ref *src, ref *tempDest1, ref *tempDest2);
- Block8x8F.UnZigDivRound(tempDest, d, quant, unzigPtr);
+ Block8x8F.UnzigDivRound(tempDest1, tempDest2, quant, unzigPtr);
+ float* unziggedDestPtr = (float*)tempDest2;
- // Emit the DC delta.
- int dc = d[0];
+ int dc = (int)unziggedDestPtr[0];
+ // Emit the DC delta.
this.EmitHuffRLE((HuffIndex)((2 * (int)index) + 0), 0, dc - prevDC);
// Emit the AC components.
@@ -575,7 +569,7 @@ namespace ImageSharp.Formats
for (int zig = 1; zig < Block8x8F.ScalarCount; zig++)
{
- int ac = d[zig];
+ int ac = (int)unziggedDestPtr[zig];
if (ac == 0)
{
@@ -823,8 +817,6 @@ namespace ImageSharp.Formats
UnzigData unzig = UnzigData.Create();
- int* unzigDest = stackalloc int[Block8x8F.ScalarCount];
-
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
@@ -846,7 +838,6 @@ namespace ImageSharp.Formats
prevDCY,
&b,
&temp1,
- unzigDest,
&temp2,
&onStackLuminanceQuantTable,
unzig.Data);
@@ -858,7 +849,6 @@ namespace ImageSharp.Formats
prevDCCb,
&b,
&temp1,
- unzigDest,
&temp2,
&onStackChrominanceQuantTable,
unzig.Data);
@@ -869,7 +859,6 @@ namespace ImageSharp.Formats
prevDCCr,
&b,
&temp1,
- unzigDest,
&temp2,
&onStackChrominanceQuantTable,
unzig.Data);
diff --git a/tests/ImageSharp.Benchmarks/General/RoundSinglePrecisionBlocks.cs b/tests/ImageSharp.Benchmarks/General/RoundSinglePrecisionBlocks.cs
index 8c104dff0..52880168d 100644
--- a/tests/ImageSharp.Benchmarks/General/RoundSinglePrecisionBlocks.cs
+++ b/tests/ImageSharp.Benchmarks/General/RoundSinglePrecisionBlocks.cs
@@ -96,7 +96,6 @@
Block8x8F bDividend = this.inputDividend;
Block8x8F bDivisor = this.inputDivisior;
float* pDividend = (float*)&bDividend;
- float* pDivisor = (float*)&bDivisor;
for (int cnt = 0; cnt < ExecutionCount; cnt++)
{
diff --git a/tests/ImageSharp.Benchmarks/General/Vector4Constants.cs b/tests/ImageSharp.Benchmarks/General/Vector4Constants.cs
new file mode 100644
index 000000000..a7afa336e
--- /dev/null
+++ b/tests/ImageSharp.Benchmarks/General/Vector4Constants.cs
@@ -0,0 +1,61 @@
+namespace ImageSharp.Benchmarks.General
+{
+ using System;
+ using System.Numerics;
+
+ using BenchmarkDotNet.Attributes;
+
+ ///
+ /// Has it any effect on performance to store SIMD constants as static readonly fields? Is it OK to always inline them?
+ /// Spoiler: the difference seems to be statistically insignificant!
+ ///
+ public class Vector4Constants
+ {
+ private static readonly Vector4 A = new Vector4(1.2f);
+ private static readonly Vector4 B = new Vector4(3.4f);
+ private static readonly Vector4 C = new Vector4(5.6f);
+ private static readonly Vector4 D = new Vector4(7.8f);
+
+ private Random random = null;
+
+ private Vector4 parameter;
+
+ [Setup]
+ public void Setup()
+ {
+ this.random = new Random(42);
+ this.parameter = new Vector4(
+ this.GetRandomFloat(),
+ this.GetRandomFloat(),
+ this.GetRandomFloat(),
+ this.GetRandomFloat()
+ );
+ }
+
+ [Benchmark(Baseline = true)]
+ public Vector4 Static()
+ {
+ Vector4 p = this.parameter;
+
+ Vector4 x = p * A / B + p * C / D;
+ Vector4 y = p / A * B + p / C * D;
+ Vector4 z = Vector4.Min(p, A);
+ Vector4 w = Vector4.Max(p, B);
+ return x + y + z + w;
+ }
+
+ [Benchmark]
+ public Vector4 Inlined()
+ {
+ Vector4 p = this.parameter;
+
+ Vector4 x = p * new Vector4(1.2f) / new Vector4(2.3f) + p * new Vector4(4.5f) / new Vector4(6.7f);
+ Vector4 y = p / new Vector4(1.2f) * new Vector4(2.3f) + p / new Vector4(4.5f) * new Vector4(6.7f);
+ Vector4 z = Vector4.Min(p, new Vector4(1.2f));
+ Vector4 w = Vector4.Max(p, new Vector4(2.3f));
+ return x + y + z + w;
+ }
+
+ private float GetRandomFloat() => (float)this.random.NextDouble();
+ }
+}
\ No newline at end of file
diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
index 1f055bab4..690a8b620 100644
--- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
@@ -12,6 +12,8 @@ namespace ImageSharp.Tests
{
using System.Diagnostics;
using System.Numerics;
+
+ using ImageSharp.Formats;
using ImageSharp.Formats.Jpg;
using Xunit;
@@ -431,5 +433,34 @@ namespace ImageSharp.Tests
Assert.Equal(actualDest.Data, expectedDest.Data, new ApproximateFloatComparer(1f));
}
+
+ [Theory]
+ [InlineData(1)]
+ [InlineData(2)]
+ public unsafe void UnzigDivRound(int seed)
+ {
+ Block8x8F block = new Block8x8F();
+ block.LoadFrom(Create8x8RandomFloatData(-2000, 2000, seed));
+
+ Block8x8F qt = new Block8x8F();
+ qt.LoadFrom(Create8x8RandomFloatData(-2000, 2000, seed));
+
+ UnzigData unzig = UnzigData.Create();
+
+ int* expectedResults = stackalloc int[Block8x8F.ScalarCount];
+ ReferenceImplementations.UnZigDivRoundRational(&block, expectedResults, &qt, unzig.Data);
+
+ Block8x8F actualResults = default(Block8x8F);
+
+ Block8x8F.UnzigDivRound(&block, &actualResults, &qt, unzig.Data);
+
+ for (int i = 0; i < Block8x8F.ScalarCount; i++)
+ {
+ int expected = expectedResults[i];
+ int actual = (int)actualResults[i];
+
+ Assert.Equal(expected, actual);
+ }
+ }
}
}
\ No newline at end of file
diff --git a/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs b/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs
index 60c136674..06882719c 100644
--- a/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs
@@ -875,5 +875,31 @@ namespace ImageSharp.Tests
}
}
}
+
+ public static unsafe void UnZigDivRoundRational(Block8x8F* src, int* dest, Block8x8F* qt, int* unzigPtr)
+ {
+ float* s = (float*)src;
+ float* q = (float*)qt;
+
+ for (int zig = 0; zig < Block8x8F.ScalarCount; zig++)
+ {
+ int a = (int)s[unzigPtr[zig]];
+ int b = (int)q[zig];
+
+ int val = RationalRound(a, b);
+ dest[zig] = val;
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static int RationalRound(int dividend, int divisor)
+ {
+ if (dividend >= 0)
+ {
+ return (dividend + (divisor >> 1)) / divisor;
+ }
+
+ return -((-dividend + (divisor >> 1)) / divisor);
+ }
}
}
\ No newline at end of file