From 8c2325abe92d371acfdbbda4d30b5748e275b530 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 24 Dec 2016 02:30:17 +0100 Subject: [PATCH] (re) added DCT, Test & ReferenceImplementations changes --- .../Formats/Jpg/Components/Block8x8F.cs | 61 +- src/ImageSharp/Formats/Jpg/Components/DCT.cs | 395 +++++++++++++ .../Jpg/Components/MutableSpanExtensions.cs | 55 +- .../Formats/Jpg/Block8x8FTests.cs | 337 ++++++----- .../ImageSharp.Tests/Formats/Jpg/JpegTests.cs | 3 - .../Formats/Jpg/ReferenceImplementations.cs | 539 +++++++++++++++++- .../Jpg/ReferenceImplementationsTests.cs | 144 +++++ .../Formats/Jpg/UtilityTestClassBase.cs | 103 +++- .../Jpg/ReferenceImplementationsTests.cs | 144 +++++ .../ImageSharp.Tests46.csproj | 1 + 10 files changed, 1600 insertions(+), 182 deletions(-) create mode 100644 src/ImageSharp/Formats/Jpg/Components/DCT.cs create mode 100644 tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementationsTests.cs create mode 100644 tests/ImageSharp.Tests46/Formats/Jpg/ReferenceImplementationsTests.cs diff --git a/src/ImageSharp/Formats/Jpg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpg/Components/Block8x8F.cs index 8b86e3bd5d..35d6044f5a 100644 --- a/src/ImageSharp/Formats/Jpg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpg/Components/Block8x8F.cs @@ -160,28 +160,50 @@ namespace ImageSharp.Formats /// /// Multiply in place /// - /// Scalar to multiply by + /// Vector to multiply by [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void MultiplyAllInplace(Vector4 scalar) + public void MultiplyAllInplace(Vector4 scaleVec) { - this.V0L *= scalar; - this.V0R *= scalar; - this.V1L *= scalar; - this.V1R *= scalar; - this.V2L *= scalar; - this.V2R *= scalar; - this.V3L *= scalar; - this.V3R *= scalar; - this.V4L *= scalar; - this.V4R *= scalar; - this.V5L *= scalar; - this.V5R *= scalar; - this.V6L *= scalar; - this.V6R *= scalar; - this.V7L *= scalar; - this.V7R *= scalar; + this.V0L *= scaleVec; + this.V0R *= scaleVec; + this.V1L *= scaleVec; + this.V1R *= scaleVec; + this.V2L *= scaleVec; + this.V2R *= scaleVec; + this.V3L *= scaleVec; + this.V3R *= scaleVec; + this.V4L *= scaleVec; + this.V4R *= scaleVec; + this.V5L *= scaleVec; + this.V5R *= scaleVec; + this.V6L *= scaleVec; + this.V6R *= scaleVec; + this.V7L *= scaleVec; + this.V7R *= scaleVec; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void AddToAllInplace(Vector4 diff) + { + this.V0L += diff; + this.V0R += diff; + this.V1L += diff; + this.V1R += diff; + this.V2L += diff; + this.V2R += diff; + this.V3L += diff; + this.V3R += diff; + this.V4L += diff; + this.V4R += diff; + this.V5L += diff; + this.V5R += diff; + this.V6L += diff; + this.V6R += diff; + this.V7L += diff; + this.V7R += diff; + } + + /// /// Apply floating point IDCT transformation into dest, using a temporary block 'temp' provided by the caller (optimization) /// @@ -206,7 +228,7 @@ namespace ImageSharp.Formats /// /// Block pointer /// Index - /// The scalar value at the specified index + /// The scaleVec value at the specified index [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static unsafe float GetScalarAt(Block8x8F* blockPtr, int idx) { @@ -420,6 +442,7 @@ namespace ImageSharp.Formats this.CopyTo(legacyBlock.Data); } + /// /// Level shift by +128, clip to [0, 255], and write to buffer. /// diff --git a/src/ImageSharp/Formats/Jpg/Components/DCT.cs b/src/ImageSharp/Formats/Jpg/Components/DCT.cs new file mode 100644 index 0000000000..a4763a8f59 --- /dev/null +++ b/src/ImageSharp/Formats/Jpg/Components/DCT.cs @@ -0,0 +1,395 @@ +// +// Copyright (c) James Jackson-South and contributors. +// Licensed under the Apache License, Version 2.0. +// +// ReSharper disable InconsistentNaming +namespace ImageSharp.Formats +{ + using System.Numerics; + using System.Runtime.CompilerServices; + + /// + /// Contains forward & inverse DCT implementations + /// + internal static class DCT + { + /// + /// Apply floating point IDCT transformation into dest, using a temporary block 'temp' provided by the caller (optimization) + /// + /// Source + /// Destination + /// Temporary block provided by the caller + public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp) + { + src.TransposeInto(ref temp); + IDCT8x4_LeftPart(ref temp, ref dest); + IDCT8x4_RightPart(ref temp, ref dest); + + dest.TransposeInto(ref temp); + + IDCT8x4_LeftPart(ref temp, ref dest); + IDCT8x4_RightPart(ref temp, ref dest); + + dest.MultiplyAllInplace(C_0_125); + } + +#pragma warning disable SA1310 // FieldNamesMustNotContainUnderscore + private static readonly Vector4 C_1_175876 = new Vector4(1.175876f); + private static readonly Vector4 C_1_961571 = new Vector4(-1.961571f); + private static readonly Vector4 C_0_390181 = new Vector4(-0.390181f); + private static readonly Vector4 C_0_899976 = new Vector4(-0.899976f); + private static readonly Vector4 C_2_562915 = new Vector4(-2.562915f); + private static readonly Vector4 C_0_298631 = new Vector4(0.298631f); + private static readonly Vector4 C_2_053120 = new Vector4(2.053120f); + private static readonly Vector4 C_3_072711 = new Vector4(3.072711f); + private static readonly Vector4 C_1_501321 = new Vector4(1.501321f); + private static readonly Vector4 C_0_541196 = new Vector4(0.541196f); + private static readonly Vector4 C_1_847759 = new Vector4(-1.847759f); + private static readonly Vector4 C_0_765367 = new Vector4(0.765367f); + + private static readonly Vector4 C_0_125 = new Vector4(0.1250f); +#pragma warning restore SA1310 // FieldNamesMustNotContainUnderscore + private static readonly Vector4 InvSqrt2 = new Vector4(0.707107f); + + /// + /// Do IDCT internal operations on the left part of the block. Original source: + /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261 + /// + /// Destination block + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void IDCT8x4_LeftPart(ref Block8x8F s, ref Block8x8F d) + { + Vector4 my1 = s.V1L; + Vector4 my7 = s.V7L; + Vector4 mz0 = my1 + my7; + + Vector4 my3 = s.V3L; + Vector4 mz2 = my3 + my7; + Vector4 my5 = s.V5L; + Vector4 mz1 = my3 + my5; + Vector4 mz3 = my1 + my5; + + Vector4 mz4 = ((mz0 + mz1) * C_1_175876); + + mz2 = (mz2 * C_1_961571) + mz4; + mz3 = (mz3 * C_0_390181) + mz4; + mz0 = mz0 * C_0_899976; + mz1 = mz1 * C_2_562915; + + Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2; + Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3; + Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2; + Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3; + + Vector4 my2 = s.V2L; + Vector4 my6 = s.V6L; + mz4 = (my2 + my6) * C_0_541196; + Vector4 my0 = s.V0L; + Vector4 my4 = s.V4L; + mz0 = my0 + my4; + mz1 = my0 - my4; + + mz2 = mz4 + (my6 * C_1_847759); + mz3 = mz4 + (my2 * C_0_765367); + + my0 = mz0 + mz3; + my3 = mz0 - mz3; + my1 = mz1 + mz2; + my2 = mz1 - mz2; + + d.V0L = my0 + mb0; + d.V7L = my0 - mb0; + d.V1L = my1 + mb1; + d.V6L = my1 - mb1; + d.V2L = my2 + mb2; + d.V5L = my2 - mb2; + d.V3L = my3 + mb3; + d.V4L = my3 - mb3; + } + + /// + /// Do IDCT internal operations on the right part of the block. + /// Original source: + /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261 + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void IDCT8x4_RightPart(ref Block8x8F s, ref Block8x8F d) + { + Vector4 my1 = s.V1R; + Vector4 my7 = s.V7R; + Vector4 mz0 = my1 + my7; + + Vector4 my3 = s.V3R; + Vector4 mz2 = my3 + my7; + Vector4 my5 = s.V5R; + Vector4 mz1 = my3 + my5; + Vector4 mz3 = my1 + my5; + + Vector4 mz4 = (mz0 + mz1) * C_1_175876; + + mz2 = (mz2 * C_1_961571) + mz4; + mz3 = (mz3 * C_0_390181) + mz4; + mz0 = mz0 * C_0_899976; + mz1 = mz1 * C_2_562915; + + Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2; + Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3; + Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2; + Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3; + + Vector4 my2 = s.V2R; + Vector4 my6 = s.V6R; + mz4 = (my2 + my6) * C_0_541196; + Vector4 my0 = s.V0R; + Vector4 my4 = s.V4R; + mz0 = my0 + my4; + mz1 = my0 - my4; + + mz2 = mz4 + (my6 * C_1_847759); + mz3 = mz4 + (my2 * C_0_765367); + + my0 = mz0 + mz3; + my3 = mz0 - mz3; + my1 = mz1 + mz2; + my2 = mz1 - mz2; + + d.V0R = my0 + mb0; + d.V7R = my0 - mb0; + d.V1R = my1 + mb1; + d.V6R = my1 - mb1; + d.V2R = my2 + mb2; + d.V5R = my2 - mb2; + d.V3R = my3 + mb3; + d.V4R = my3 - mb3; + } + + + /// + /// Original: + /// + /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L15 + /// + /// + /// Source + /// Destination + public static void FDCT8x4_LeftPart(ref Block8x8F s, ref Block8x8F d) + { + Vector4 c0 = s.V0L; + Vector4 c1 = s.V7L; + Vector4 t0 = (c0 + c1); + Vector4 t7 = (c0 - c1); + + c1 = s.V6L; + c0 = s.V1L; + Vector4 t1 = (c0 + c1); + Vector4 t6 = (c0 - c1); + + c1 = s.V5L; + c0 = s.V2L; + Vector4 t2 = (c0 + c1); + Vector4 t5 = (c0 - c1); + + c0 = s.V3L; + c1 = s.V4L; + Vector4 t3 = (c0 + c1); + Vector4 t4 = (c0 - c1); + + /* + c1 = x[0]; c2 = x[7]; t0 = c1 + c2; t7 = c1 - c2; + c1 = x[1]; c2 = x[6]; t1 = c1 + c2; t6 = c1 - c2; + c1 = x[2]; c2 = x[5]; t2 = c1 + c2; t5 = c1 - c2; + c1 = x[3]; c2 = x[4]; t3 = c1 + c2; t4 = c1 - c2; + */ + + c0 = (t0 + t3); + Vector4 c3 = (t0 - t3); + c1 = (t1 + t2); + Vector4 c2 = (t1 - t2); + + /* + c0 = t0 + t3; c3 = t0 - t3; + c1 = t1 + t2; c2 = t1 - t2; + */ + + d.V0L = c0 + c1; + d.V4L = c0 - c1; + + /*y[0] = c0 + c1; + y[4] = c0 - c1;*/ + + Vector4 w0 = new Vector4(0.541196f); + Vector4 w1 = new Vector4(1.306563f); + + + d.V2L = (w0 * c2) + (w1 * c3); + + d.V6L = (w0 * c3) - (w1 * c2); + /* + y[2] = c2 * r[6] + c3 * r[2]; + y[6] = c3 * r[6] - c2 * r[2]; + */ + + w0 = new Vector4(1.175876f); + w1 = new Vector4(0.785695f); + c3 = ((w0 * t4) + (w1 * t7)); + c0 = ((w0 * t7) - (w1 * t4)); + /* + c3 = t4 * r[3] + t7 * r[5]; + c0 = t7 * r[3] - t4 * r[5]; + */ + + w0 = new Vector4(1.387040f); + w1 = new Vector4(0.275899f); + c2 = ((w0 * t5) + (w1 * t6)); + c1 = ((w0 * t6) - (w1 * t5)); + /* + c2 = t5 * r[1] + t6 * r[7]; + c1 = t6 * r[1] - t5 * r[7]; + */ + + d.V3L = (c0 - c2); + + d.V5L = (c3 - c1); + //y[5] = c3 - c1; y[3] = c0 - c2; + + Vector4 invsqrt2 = new Vector4(0.707107f); + c0 = ((c0 + c2) * invsqrt2); + c3 = ((c3 + c1) * invsqrt2); + //c0 = (c0 + c2) * invsqrt2; + //c3 = (c3 + c1) * invsqrt2; + + d.V1L = (c0 + c3); + + d.V7L = (c0 - c3); + //y[1] = c0 + c3; y[7] = c0 - c3; + + /*for(i = 0;i < 8;i++) + { + y[i] *= invsqrt2h; + }*/ + } + + /// + /// Original: + /// + /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L15 + /// + /// + /// Source + /// Destination + public static void FDCT8x4_RightPart(ref Block8x8F s, ref Block8x8F d) + { + Vector4 c0 = s.V0R; + Vector4 c1 = s.V7R; + Vector4 t0 = (c0 + c1); + Vector4 t7 = (c0 - c1); + + c1 = s.V6R; + c0 = s.V1R; + Vector4 t1 = (c0 + c1); + Vector4 t6 = (c0 - c1); + + c1 = s.V5R; + c0 = s.V2R; + Vector4 t2 = (c0 + c1); + Vector4 t5 = (c0 - c1); + + c0 = s.V3R; + c1 = s.V4R; + Vector4 t3 = (c0 + c1); + Vector4 t4 = (c0 - c1); + + /* + c1 = x[0]; c2 = x[7]; t0 = c1 + c2; t7 = c1 - c2; + c1 = x[1]; c2 = x[6]; t1 = c1 + c2; t6 = c1 - c2; + c1 = x[2]; c2 = x[5]; t2 = c1 + c2; t5 = c1 - c2; + c1 = x[3]; c2 = x[4]; t3 = c1 + c2; t4 = c1 - c2; + */ + + c0 = (t0 + t3); + Vector4 c3 = (t0 - t3); + c1 = (t1 + t2); + Vector4 c2 = (t1 - t2); + + /* + c0 = t0 + t3; c3 = t0 - t3; + c1 = t1 + t2; c2 = t1 - t2; + */ + + d.V0R = c0 + c1; + d.V4R = c0 - c1; + + /*y[0] = c0 + c1; + y[4] = c0 - c1;*/ + + Vector4 w0 = new Vector4(0.541196f); + Vector4 w1 = new Vector4(1.306563f); + + + d.V2R = (w0 * c2) + (w1 * c3); + + d.V6R = (w0 * c3) - (w1 * c2); + /* + y[2] = c2 * r[6] + c3 * r[2]; + y[6] = c3 * r[6] - c2 * r[2]; + */ + + w0 = new Vector4(1.175876f); + w1 = new Vector4(0.785695f); + c3 = ((w0 * t4) + (w1 * t7)); + c0 = ((w0 * t7) - (w1 * t4)); + /* + c3 = t4 * r[3] + t7 * r[5]; + c0 = t7 * r[3] - t4 * r[5]; + */ + + w0 = new Vector4(1.387040f); + w1 = new Vector4(0.275899f); + c2 = ((w0 * t5) + (w1 * t6)); + c1 = ((w0 * t6) - (w1 * t5)); + /* + c2 = t5 * r[1] + t6 * r[7]; + c1 = t6 * r[1] - t5 * r[7]; + */ + + d.V3R = (c0 - c2); + + d.V5R = (c3 - c1); + //y[5] = c3 - c1; y[3] = c0 - c2; + + c0 = ((c0 + c2) * InvSqrt2); + c3 = ((c3 + c1) * InvSqrt2); + //c0 = (c0 + c2) * invsqrt2; + //c3 = (c3 + c1) * invsqrt2; + + d.V1R = (c0 + c3); + + d.V7R = (c0 - c3); + //y[1] = c0 + c3; y[7] = c0 - c3; + + /*for(i = 0;i < 8;i++) + { + y[i] *= invsqrt2h; + }*/ + } + + public static void TransformFDCT(ref Block8x8F s, ref Block8x8F d, ref Block8x8F temp, bool offsetSourceByNeg128 = true) + { + s.TransposeInto(ref temp); + if (offsetSourceByNeg128) + { + temp.AddToAllInplace(new Vector4(-128)); + } + + FDCT8x4_LeftPart(ref temp, ref d); + FDCT8x4_RightPart(ref temp, ref d); + + d.TransposeInto(ref temp); + + FDCT8x4_LeftPart(ref temp, ref d); + FDCT8x4_RightPart(ref temp, ref d); + + d.MultiplyAllInplace(C_0_125); + } + } +} \ No newline at end of file diff --git a/src/ImageSharp/Formats/Jpg/Components/MutableSpanExtensions.cs b/src/ImageSharp/Formats/Jpg/Components/MutableSpanExtensions.cs index 42edcd3c4a..7253a816e1 100644 --- a/src/ImageSharp/Formats/Jpg/Components/MutableSpanExtensions.cs +++ b/src/ImageSharp/Formats/Jpg/Components/MutableSpanExtensions.cs @@ -3,7 +3,7 @@ // Licensed under the Apache License, Version 2.0. // -namespace ImageSharp.Formats.Jpg.Components +namespace ImageSharp.Formats { using System.Numerics; using System.Runtime.CompilerServices; @@ -77,5 +77,58 @@ namespace ImageSharp.Formats.Jpg.Components data[2] = (int)v.Z; data[3] = (int)v.W; } + + + + public static MutableSpan ConvertToFloat32MutableSpan(this MutableSpan src) + { + MutableSpan result = new MutableSpan(src.TotalCount); + for (int i = 0; i < src.TotalCount; i++) + { + result[i] = (float)src[i]; + } + return result; + } + + public static MutableSpan ConvertToInt32MutableSpan(this MutableSpan src) + { + MutableSpan result = new MutableSpan(src.TotalCount); + for (int i = 0; i < src.TotalCount; i++) + { + result[i] = (int)src[i]; + } + return result; + } + + public static MutableSpan AddScalarToAllValues(this MutableSpan src, float scalar) + { + MutableSpan result = new MutableSpan(src.TotalCount); + for (int i = 0; i < src.TotalCount; i++) + { + result[i] = src[i] + scalar; + } + return result; + } + + public static MutableSpan AddScalarToAllValues(this MutableSpan src, int scalar) + { + MutableSpan result = new MutableSpan(src.TotalCount); + for (int i = 0; i < src.TotalCount; i++) + { + result[i] = src[i] + scalar; + } + return result; + } + + + public static MutableSpan Copy(this MutableSpan src) + { + MutableSpan result = new MutableSpan(src.TotalCount); + for (int i = 0; i < src.TotalCount; i++) + { + result[i] = src[i]; + } + return result; + } } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index f2ff236344..8939d2c207 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -1,20 +1,18 @@ // Uncomment this to turn unit tests into benchmarks: //#define BENCHMARKING -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Text; -using ImageSharp.Formats; -using Xunit; -using Xunit.Abstractions; // ReSharper disable InconsistentNaming namespace ImageSharp.Tests.Formats.Jpg { + using System.Diagnostics; + using System.Numerics; + + using ImageSharp.Formats; + + using Xunit; + using Xunit.Abstractions; + public class Block8x8FTests : UtilityTestClassBase { #if BENCHMARKING @@ -23,7 +21,8 @@ namespace ImageSharp.Tests.Formats.Jpg public const int Times = 1; #endif - public Block8x8FTests(ITestOutputHelper output) : base(output) + public Block8x8FTests(ITestOutputHelper output) + : base(output) { } @@ -31,20 +30,23 @@ namespace ImageSharp.Tests.Formats.Jpg public void Indexer() { float sum = 0; - this.Measure(Times, () => - { - Block8x8F block = new Block8x8F(); - - for (int i = 0; i < Block8x8F.ScalarCount; i++) - { - block[i] = i; - } - sum = 0; - for (int i = 0; i < Block8x8F.ScalarCount; i++) - { - sum += block[i]; - } - }); + this.Measure( + Times, + () => + { + Block8x8F block = new Block8x8F(); + + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + block[i] = i; + } + + sum = 0; + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + sum += block[i]; + } + }); Assert.Equal(sum, 64f * 63f * 0.5f); } @@ -52,21 +54,24 @@ namespace ImageSharp.Tests.Formats.Jpg public unsafe void Indexer_GetScalarAt_SetScalarAt() { float sum = 0; - this.Measure(Times, () => - { - Block8x8F block = new Block8x8F(); - - for (int i = 0; i < Block8x8F.ScalarCount; i++) - { - Block8x8F.SetScalarAt(&block, i, i); - } - sum = 0; - for (int i = 0; i < Block8x8F.ScalarCount; i++) - { - sum += Block8x8F.GetScalarAt(&block, i); - } - }); - Assert.Equal(sum, 64f*63f*0.5f); + this.Measure( + Times, + () => + { + Block8x8F block = new Block8x8F(); + + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + Block8x8F.SetScalarAt(&block, i, i); + } + + sum = 0; + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + sum += Block8x8F.GetScalarAt(&block, i); + } + }); + Assert.Equal(sum, 64f * 63f * 0.5f); } [Fact] @@ -74,21 +79,24 @@ namespace ImageSharp.Tests.Formats.Jpg { float sum = 0; - this.Measure(Times, () => - { - //Block8x8F block = new Block8x8F(); - float[] block = new float[64]; - for (int i = 0; i < Block8x8F.ScalarCount; i++) - { - block[i] = i; - } - sum = 0; - for (int i = 0; i < Block8x8F.ScalarCount; i++) - { - sum += block[i]; - } - }); - Assert.Equal(sum, 64f*63f*0.5f); + this.Measure( + Times, + () => + { + // Block8x8F block = new Block8x8F(); + float[] block = new float[64]; + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + block[i] = i; + } + + sum = 0; + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + sum += block[i]; + } + }); + Assert.Equal(sum, 64f * 63f * 0.5f); } [Fact] @@ -101,15 +109,19 @@ namespace ImageSharp.Tests.Formats.Jpg { data[i] = i; } - Measure(Times, () => - { - Block8x8F b = new Block8x8F(); - b.LoadFrom(data); - b.CopyTo(mirror); - }); + + this.Measure( + Times, + () => + { + Block8x8F b = new Block8x8F(); + b.LoadFrom(data); + b.CopyTo(mirror); + }); Assert.Equal(data, mirror); - //PrintLinearData((MutableSpan)mirror); + + // PrintLinearData((MutableSpan)mirror); } [Fact] @@ -122,15 +134,19 @@ namespace ImageSharp.Tests.Formats.Jpg { data[i] = i; } - Measure(Times, () => - { - Block8x8F b = new Block8x8F(); - Block8x8F.LoadFrom(&b, data); - Block8x8F.CopyTo(&b, mirror); - }); + + this.Measure( + Times, + () => + { + Block8x8F b = new Block8x8F(); + Block8x8F.LoadFrom(&b, data); + Block8x8F.CopyTo(&b, mirror); + }); Assert.Equal(data, mirror); - //PrintLinearData((MutableSpan)mirror); + + // PrintLinearData((MutableSpan)mirror); } [Fact] @@ -143,19 +159,21 @@ namespace ImageSharp.Tests.Formats.Jpg { data[i] = i; } - Measure(Times, () => - { - Block8x8F v = new Block8x8F(); - v.LoadFrom(data); - v.CopyTo(mirror); - }); - + this.Measure( + Times, + () => + { + Block8x8F v = new Block8x8F(); + v.LoadFrom(data); + v.CopyTo(mirror); + }); + Assert.Equal(data, mirror); - //PrintLinearData((MutableSpan)mirror); + + // PrintLinearData((MutableSpan)mirror); } - - + [Fact] public void TransposeInto() { @@ -173,8 +191,6 @@ namespace ImageSharp.Tests.Formats.Jpg Assert.Equal(expected, actual); } - - private class BufferHolder { @@ -188,7 +204,7 @@ namespace ImageSharp.Tests.Formats.Jpg source.Buffer.LoadFrom(Create8x8FloatData()); BufferHolder dest = new BufferHolder(); - Output.WriteLine($"TranposeInto_PinningImpl_Benchmark X {Times} ..."); + this.Output.WriteLine($"TranposeInto_PinningImpl_Benchmark X {Times} ..."); Stopwatch sw = Stopwatch.StartNew(); for (int i = 0; i < Times; i++) @@ -197,10 +213,9 @@ namespace ImageSharp.Tests.Formats.Jpg } sw.Stop(); - Output.WriteLine($"TranposeInto_PinningImpl_Benchmark finished in {sw.ElapsedMilliseconds} ms"); - + this.Output.WriteLine($"TranposeInto_PinningImpl_Benchmark finished in {sw.ElapsedMilliseconds} ms"); } - + [Fact] public void iDCT2D8x4_LeftPart() { @@ -214,14 +229,14 @@ namespace ImageSharp.Tests.Formats.Jpg Block8x8F dest = new Block8x8F(); - source.IDCT8x4_LeftPart(ref dest); + DCT.IDCT8x4_LeftPart(ref source, ref dest); float[] actualDestArray = new float[64]; dest.CopyTo(actualDestArray); - Print8x8Data(expectedDestArray); - Output.WriteLine("**************"); - Print8x8Data(actualDestArray); + this.Print8x8Data(expectedDestArray); + this.Output.WriteLine("**************"); + this.Print8x8Data(actualDestArray); Assert.Equal(expectedDestArray, actualDestArray); } @@ -239,65 +254,49 @@ namespace ImageSharp.Tests.Formats.Jpg Block8x8F dest = new Block8x8F(); - source.IDCT8x4_RightPart(ref dest); + DCT.IDCT8x4_RightPart(ref source, ref dest); float[] actualDestArray = new float[64]; dest.CopyTo(actualDestArray); - Print8x8Data(expectedDestArray); - Output.WriteLine("**************"); - Print8x8Data(actualDestArray); + this.Print8x8Data(expectedDestArray); + this.Output.WriteLine("**************"); + this.Print8x8Data(actualDestArray); Assert.Equal(expectedDestArray.Data, actualDestArray); } - private struct ApproximateFloatComparer : IEqualityComparer - { - private const float Eps = 0.0001f; - - public bool Equals(float x, float y) - { - float d = x - y; - - return d > -Eps && d < Eps; - } - - public int GetHashCode(float obj) - { - throw new InvalidOperationException(); - } - } - - [Fact] - public void IDCTInto() + [Theory] + [InlineData(1)] + [InlineData(2)] + [InlineData(3)] + public void TransformIDCT(int seed) { - float[] sourceArray = Create8x8FloatData(); + var sourceArray = Create8x8RandomFloatData(-200, 200, seed); float[] expectedDestArray = new float[64]; float[] tempArray = new float[64]; ReferenceImplementations.iDCT2D_llm(sourceArray, expectedDestArray, tempArray); - //ReferenceImplementations.iDCT8x8_llm_sse(sourceArray, expectedDestArray, tempArray); - + // ReferenceImplementations.iDCT8x8_llm_sse(sourceArray, expectedDestArray, tempArray); Block8x8F source = new Block8x8F(); source.LoadFrom(sourceArray); Block8x8F dest = new Block8x8F(); Block8x8F tempBuffer = new Block8x8F(); - source.TransformIDCTInto(ref dest, ref tempBuffer); + DCT.TransformIDCT(ref source, ref dest, ref tempBuffer); float[] actualDestArray = new float[64]; dest.CopyTo(actualDestArray); - Print8x8Data(expectedDestArray); - Output.WriteLine("**************"); - Print8x8Data(actualDestArray); - Assert.Equal(expectedDestArray, actualDestArray, new ApproximateFloatComparer()); - Assert.Equal(expectedDestArray, actualDestArray, new ApproximateFloatComparer()); + this.Print8x8Data(expectedDestArray); + this.Output.WriteLine("**************"); + this.Print8x8Data(actualDestArray); + Assert.Equal(expectedDestArray, actualDestArray, new ApproximateFloatComparer(1f)); + Assert.Equal(expectedDestArray, actualDestArray, new ApproximateFloatComparer(1f)); } - [Fact] public unsafe void CopyColorsTo() { @@ -308,24 +307,23 @@ namespace ImageSharp.Tests.Formats.Jpg int stride = 256; int height = 42; - int offset = height*10 + 20; + int offset = height * 10 + 20; - byte[] colorsExpected = new byte[stride*height]; - byte[] colorsActual = new byte[stride*height]; + byte[] colorsExpected = new byte[stride * height]; + byte[] colorsActual = new byte[stride * height]; Block8x8F temp = new Block8x8F(); - + ReferenceImplementations.CopyColorsTo(ref block, new MutableSpan(colorsExpected, offset), stride); block.CopyColorsTo(new MutableSpan(colorsActual, offset), stride, &temp); - //Output.WriteLine("******* EXPECTED: *********"); - //PrintLinearData(colorsExpected); - //Output.WriteLine("******** ACTUAL: **********"); - + // Output.WriteLine("******* EXPECTED: *********"); + // PrintLinearData(colorsExpected); + // Output.WriteLine("******** ACTUAL: **********"); Assert.Equal(colorsExpected, colorsActual); } - + private static float[] Create8x8ColorCropTestData() { float[] result = new float[64]; @@ -336,6 +334,7 @@ namespace ImageSharp.Tests.Formats.Jpg result[i * 8 + j] = -300 + i * 100 + j * 10; } } + return result; } @@ -345,22 +344,88 @@ namespace ImageSharp.Tests.Formats.Jpg Block8x8F block = new Block8x8F(); var input = Create8x8ColorCropTestData(); block.LoadFrom(input); - Output.WriteLine("Input:"); - PrintLinearData(input); - + this.Output.WriteLine("Input:"); + this.PrintLinearData(input); Block8x8F dest = new Block8x8F(); block.TransformByteConvetibleColorValuesInto(ref dest); float[] array = new float[64]; dest.CopyTo(array); - Output.WriteLine("Result:"); - PrintLinearData(array); + this.Output.WriteLine("Result:"); + this.PrintLinearData(array); foreach (float val in array) { Assert.InRange(val, 0, 255); } } + [Theory] + [InlineData(1)] + [InlineData(2)] + public void FDCT8x4_LeftPart(int seed) + { + var src = Create8x8RandomFloatData(-200, 200, seed); + var srcBlock = new Block8x8F(); + srcBlock.LoadFrom(src); + + var destBlock = new Block8x8F(); + + var expectedDest = new MutableSpan(64); + + ReferenceImplementations.fDCT2D8x4_32f(src, expectedDest); + DCT.FDCT8x4_LeftPart(ref srcBlock, ref destBlock); + + var actualDest = new MutableSpan(64); + destBlock.CopyTo(actualDest); + + Assert.Equal(actualDest.Data, expectedDest.Data, new ApproximateFloatComparer(1f)); + } + + [Theory] + [InlineData(1)] + [InlineData(2)] + public void FDCT8x4_RightPart(int seed) + { + var src = Create8x8RandomFloatData(-200, 200, seed); + var srcBlock = new Block8x8F(); + srcBlock.LoadFrom(src); + + var destBlock = new Block8x8F(); + + var expectedDest = new MutableSpan(64); + + ReferenceImplementations.fDCT2D8x4_32f(src.Slice(4), expectedDest.Slice(4)); + DCT.FDCT8x4_RightPart(ref srcBlock, ref destBlock); + + var actualDest = new MutableSpan(64); + destBlock.CopyTo(actualDest); + + Assert.Equal(actualDest.Data, expectedDest.Data, new ApproximateFloatComparer(1f)); + } + + [Theory] + [InlineData(1)] + [InlineData(2)] + public void TransformFDCT(int seed) + { + var src = Create8x8RandomFloatData(-200, 200, seed); + var srcBlock = new Block8x8F(); + srcBlock.LoadFrom(src); + + var destBlock = new Block8x8F(); + + var expectedDest = new MutableSpan(64); + var temp1 = new MutableSpan(64); + var temp2 = new Block8x8F(); + + ReferenceImplementations.fDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true); + DCT.TransformFDCT(ref srcBlock, ref destBlock, ref temp2, false); + + var actualDest = new MutableSpan(64); + destBlock.CopyTo(actualDest); + + Assert.Equal(actualDest.Data, expectedDest.Data, new ApproximateFloatComparer(1f)); + } } } \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegTests.cs index 11d535fb86..6a9267fec1 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegTests.cs @@ -10,9 +10,6 @@ namespace ImageSharp.Tests { public class JpegTests { - - public const string TestOutputDirectory = "TestOutput/Jpeg"; - private ITestOutputHelper Output { get; } public JpegTests(ITestOutputHelper output) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs b/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs index cee236adfa..006e959633 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs @@ -2,6 +2,7 @@ namespace ImageSharp.Tests.Formats.Jpg { + using System; using System.Numerics; using System.Runtime.CompilerServices; @@ -9,7 +10,7 @@ namespace ImageSharp.Tests.Formats.Jpg /// /// This class contains simplified (unefficient) reference implementations to produce verification data for unit tests - /// DCT code Ported from https://github.com/norishigefukushima/dct_simd + /// Floating point DCT code Ported from https://github.com/norishigefukushima/dct_simd /// internal static class ReferenceImplementations { @@ -46,6 +47,310 @@ namespace ImageSharp.Tests.Formats.Jpg } } + public static class IntegerReferenceDCT + { + private const int fix_0_298631336 = 2446; + private const int fix_0_390180644 = 3196; + private const int fix_0_541196100 = 4433; + private const int fix_0_765366865 = 6270; + private const int fix_0_899976223 = 7373; + private const int fix_1_175875602 = 9633; + private const int fix_1_501321110 = 12299; + private const int fix_1_847759065 = 15137; + private const int fix_1_961570560 = 16069; + private const int fix_2_053119869 = 16819; + private const int fix_2_562915447 = 20995; + private const int fix_3_072711026 = 25172; + + /// + /// The number of bits + /// + private const int Bits = 13; + + /// + /// The number of bits to shift by on the first pass. + /// + private const int Pass1Bits = 2; + + /// + /// The value to shift by + /// + private const int CenterJSample = 128; + + /// + /// Performs a forward DCT on an 8x8 block of coefficients, including a level shift. + /// Leave results scaled up by an overall factor of 8. + /// + /// The block of coefficients. + public static void TransformFDCTInplace(MutableSpan block) + { + // Pass 1: process rows. + for (int y = 0; y < 8; y++) + { + int y8 = y * 8; + + int x0 = block[y8]; + int x1 = block[y8 + 1]; + int x2 = block[y8 + 2]; + int x3 = block[y8 + 3]; + int x4 = block[y8 + 4]; + int x5 = block[y8 + 5]; + int x6 = block[y8 + 6]; + int x7 = block[y8 + 7]; + + int tmp0 = x0 + x7; + int tmp1 = x1 + x6; + int tmp2 = x2 + x5; + int tmp3 = x3 + x4; + + int tmp10 = tmp0 + tmp3; + int tmp12 = tmp0 - tmp3; + int tmp11 = tmp1 + tmp2; + int tmp13 = tmp1 - tmp2; + + tmp0 = x0 - x7; + tmp1 = x1 - x6; + tmp2 = x2 - x5; + tmp3 = x3 - x4; + + block[y8] = (tmp10 + tmp11 - (8 * CenterJSample)) << Pass1Bits; + block[y8 + 4] = (tmp10 - tmp11) << Pass1Bits; + int z1 = (tmp12 + tmp13) * fix_0_541196100; + z1 += 1 << (Bits - Pass1Bits - 1); + block[y8 + 2] = (z1 + (tmp12 * fix_0_765366865)) >> (Bits - Pass1Bits); + block[y8 + 6] = (z1 - (tmp13 * fix_1_847759065)) >> (Bits - Pass1Bits); + + tmp10 = tmp0 + tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp0 + tmp2; + tmp13 = tmp1 + tmp3; + z1 = (tmp12 + tmp13) * fix_1_175875602; + z1 += 1 << (Bits - Pass1Bits - 1); + tmp0 = tmp0 * fix_1_501321110; + tmp1 = tmp1 * fix_3_072711026; + tmp2 = tmp2 * fix_2_053119869; + tmp3 = tmp3 * fix_0_298631336; + tmp10 = tmp10 * -fix_0_899976223; + tmp11 = tmp11 * -fix_2_562915447; + tmp12 = tmp12 * -fix_0_390180644; + tmp13 = tmp13 * -fix_1_961570560; + + tmp12 += z1; + tmp13 += z1; + block[y8 + 1] = (tmp0 + tmp10 + tmp12) >> (Bits - Pass1Bits); + block[y8 + 3] = (tmp1 + tmp11 + tmp13) >> (Bits - Pass1Bits); + block[y8 + 5] = (tmp2 + tmp11 + tmp12) >> (Bits - Pass1Bits); + block[y8 + 7] = (tmp3 + tmp10 + tmp13) >> (Bits - Pass1Bits); + } + + // Pass 2: process columns. + // We remove pass1Bits scaling, but leave results scaled up by an overall factor of 8. + for (int x = 0; x < 8; x++) + { + int tmp0 = block[x] + block[56 + x]; + int tmp1 = block[8 + x] + block[48 + x]; + int tmp2 = block[16 + x] + block[40 + x]; + int tmp3 = block[24 + x] + block[32 + x]; + + int tmp10 = tmp0 + tmp3 + (1 << (Pass1Bits - 1)); + int tmp12 = tmp0 - tmp3; + int tmp11 = tmp1 + tmp2; + int tmp13 = tmp1 - tmp2; + + tmp0 = block[x] - block[56 + x]; + tmp1 = block[8 + x] - block[48 + x]; + tmp2 = block[16 + x] - block[40 + x]; + tmp3 = block[24 + x] - block[32 + x]; + + block[x] = (tmp10 + tmp11) >> Pass1Bits; + block[32 + x] = (tmp10 - tmp11) >> Pass1Bits; + + int z1 = (tmp12 + tmp13) * fix_0_541196100; + z1 += 1 << (Bits + Pass1Bits - 1); + block[16 + x] = (z1 + (tmp12 * fix_0_765366865)) >> (Bits + Pass1Bits); + block[48 + x] = (z1 - (tmp13 * fix_1_847759065)) >> (Bits + Pass1Bits); + + tmp10 = tmp0 + tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp0 + tmp2; + tmp13 = tmp1 + tmp3; + z1 = (tmp12 + tmp13) * fix_1_175875602; + z1 += 1 << (Bits + Pass1Bits - 1); + tmp0 = tmp0 * fix_1_501321110; + tmp1 = tmp1 * fix_3_072711026; + tmp2 = tmp2 * fix_2_053119869; + tmp3 = tmp3 * fix_0_298631336; + tmp10 = tmp10 * -fix_0_899976223; + tmp11 = tmp11 * -fix_2_562915447; + tmp12 = tmp12 * -fix_0_390180644; + tmp13 = tmp13 * -fix_1_961570560; + + tmp12 += z1; + tmp13 += z1; + block[8 + x] = (tmp0 + tmp10 + tmp12) >> (Bits + Pass1Bits); + block[24 + x] = (tmp1 + tmp11 + tmp13) >> (Bits + Pass1Bits); + block[40 + x] = (tmp2 + tmp11 + tmp12) >> (Bits + Pass1Bits); + block[56 + x] = (tmp3 + tmp10 + tmp13) >> (Bits + Pass1Bits); + } + + } + private const int w1 = 2841; // 2048*sqrt(2)*cos(1*pi/16) + private const int w2 = 2676; // 2048*sqrt(2)*cos(2*pi/16) + private const int w3 = 2408; // 2048*sqrt(2)*cos(3*pi/16) + private const int w5 = 1609; // 2048*sqrt(2)*cos(5*pi/16) + private const int w6 = 1108; // 2048*sqrt(2)*cos(6*pi/16) + private const int w7 = 565; // 2048*sqrt(2)*cos(7*pi/16) + + private const int w1pw7 = w1 + w7; + private const int w1mw7 = w1 - w7; + private const int w2pw6 = w2 + w6; + private const int w2mw6 = w2 - w6; + private const int w3pw5 = w3 + w5; + private const int w3mw5 = w3 - w5; + + private const int r2 = 181; // 256/sqrt(2) + + /// + /// Performs a 2-D Inverse Discrete Cosine Transformation. + /// + /// The input coefficients should already have been multiplied by the + /// appropriate quantization table. We use fixed-point computation, with the + /// number of bits for the fractional component varying over the intermediate + /// stages. + /// + /// For more on the actual algorithm, see Z. Wang, "Fast algorithms for the + /// discrete W transform and for the discrete Fourier transform", IEEE Trans. on + /// ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984. + /// + /// The source block of coefficients + public static void TransformIDCTInplace(MutableSpan src) + { + // Horizontal 1-D IDCT. + for (int y = 0; y < 8; y++) + { + int y8 = y * 8; + + // If all the AC components are zero, then the IDCT is trivial. + if (src[y8 + 1] == 0 && src[y8 + 2] == 0 && src[y8 + 3] == 0 && + src[y8 + 4] == 0 && src[y8 + 5] == 0 && src[y8 + 6] == 0 && src[y8 + 7] == 0) + { + int dc = src[y8 + 0] << 3; + src[y8 + 0] = dc; + src[y8 + 1] = dc; + src[y8 + 2] = dc; + src[y8 + 3] = dc; + src[y8 + 4] = dc; + src[y8 + 5] = dc; + src[y8 + 6] = dc; + src[y8 + 7] = dc; + continue; + } + + // Prescale. + int x0 = (src[y8 + 0] << 11) + 128; + int x1 = src[y8 + 4] << 11; + int x2 = src[y8 + 6]; + int x3 = src[y8 + 2]; + int x4 = src[y8 + 1]; + int x5 = src[y8 + 7]; + int x6 = src[y8 + 5]; + int x7 = src[y8 + 3]; + + // Stage 1. + int x8 = w7 * (x4 + x5); + x4 = x8 + (w1mw7 * x4); + x5 = x8 - (w1pw7 * x5); + x8 = w3 * (x6 + x7); + x6 = x8 - (w3mw5 * x6); + x7 = x8 - (w3pw5 * x7); + + // Stage 2. + x8 = x0 + x1; + x0 -= x1; + x1 = w6 * (x3 + x2); + x2 = x1 - (w2pw6 * x2); + x3 = x1 + (w2mw6 * x3); + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + // Stage 3. + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = ((r2 * (x4 + x5)) + 128) >> 8; + x4 = ((r2 * (x4 - x5)) + 128) >> 8; + + // Stage 4. + src[y8 + 0] = (x7 + x1) >> 8; + src[y8 + 1] = (x3 + x2) >> 8; + src[y8 + 2] = (x0 + x4) >> 8; + src[y8 + 3] = (x8 + x6) >> 8; + src[y8 + 4] = (x8 - x6) >> 8; + src[y8 + 5] = (x0 - x4) >> 8; + src[y8 + 6] = (x3 - x2) >> 8; + src[y8 + 7] = (x7 - x1) >> 8; + } + + // Vertical 1-D IDCT. + for (int x = 0; x < 8; x++) + { + // Similar to the horizontal 1-D IDCT case, if all the AC components are zero, then the IDCT is trivial. + // However, after performing the horizontal 1-D IDCT, there are typically non-zero AC components, so + // we do not bother to check for the all-zero case. + + // Prescale. + int y0 = (src[x] << 8) + 8192; + int y1 = src[32 + x] << 8; + int y2 = src[48 + x]; + int y3 = src[16 + x]; + int y4 = src[8 + x]; + int y5 = src[56 + x]; + int y6 = src[40 + x]; + int y7 = src[24 + x]; + + // Stage 1. + int y8 = (w7 * (y4 + y5)) + 4; + y4 = (y8 + (w1mw7 * y4)) >> 3; + y5 = (y8 - (w1pw7 * y5)) >> 3; + y8 = (w3 * (y6 + y7)) + 4; + y6 = (y8 - (w3mw5 * y6)) >> 3; + y7 = (y8 - (w3pw5 * y7)) >> 3; + + // Stage 2. + y8 = y0 + y1; + y0 -= y1; + y1 = (w6 * (y3 + y2)) + 4; + y2 = (y1 - (w2pw6 * y2)) >> 3; + y3 = (y1 + (w2mw6 * y3)) >> 3; + y1 = y4 + y6; + y4 -= y6; + y6 = y5 + y7; + y5 -= y7; + + // Stage 3. + y7 = y8 + y3; + y8 -= y3; + y3 = y0 + y2; + y0 -= y2; + y2 = ((r2 * (y4 + y5)) + 128) >> 8; + y4 = ((r2 * (y4 - y5)) + 128) >> 8; + + // Stage 4. + src[x] = (y7 + y1) >> 14; + src[8 + x] = (y3 + y2) >> 14; + src[16 + x] = (y0 + y4) >> 14; + src[24 + x] = (y8 + y6) >> 14; + src[32 + x] = (y8 - y6) >> 14; + src[40 + x] = (y0 - y4) >> 14; + src[48 + x] = (y3 - y2) >> 14; + src[56 + x] = (y7 - y1) >> 14; + } + } + } + /// /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L200 /// @@ -56,9 +361,11 @@ namespace ImageSharp.Tests.Formats.Jpg float a0, a1, a2, a3, b0, b1, b2, b3; float z0, z1, z2, z3, z4; + float r0 = 1.414214f; float r1 = 1.387040f; float r2 = 1.306563f; float r3 = 1.175876f; + float r4 = 1.000000f; float r5 = 0.785695f; float r6 = 0.541196f; float r7 = 0.275899f; @@ -130,6 +437,145 @@ namespace ImageSharp.Tests.Formats.Jpg } } + /// + /// Original: + /// + /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L15 + /// + /// + /// Source + /// Destination + public static void fDCT2D8x4_32f(MutableSpan s, MutableSpan d) + { + Vector4 c0 = _mm_load_ps(s, 0); + Vector4 c1 = _mm_load_ps(s, 56); + Vector4 t0 = (c0 + c1); + Vector4 t7 = (c0 - c1); + + c1 = _mm_load_ps(s, 48); + c0 = _mm_load_ps(s, 8); + Vector4 t1 = (c0 + c1); + Vector4 t6 = (c0 - c1); + + c1 = _mm_load_ps(s, 40); + c0 = _mm_load_ps(s, 16); + Vector4 t2 = (c0 + c1); + Vector4 t5 = (c0 - c1); + + c0 = _mm_load_ps(s, 24); + c1 = _mm_load_ps(s, 32); + Vector4 t3 = (c0 + c1); + Vector4 t4 = (c0 - c1); + + /* + c1 = x[0]; c2 = x[7]; t0 = c1 + c2; t7 = c1 - c2; + c1 = x[1]; c2 = x[6]; t1 = c1 + c2; t6 = c1 - c2; + c1 = x[2]; c2 = x[5]; t2 = c1 + c2; t5 = c1 - c2; + c1 = x[3]; c2 = x[4]; t3 = c1 + c2; t4 = c1 - c2; + */ + + c0 = (t0 + t3); + Vector4 c3 = (t0 - t3); + c1 = (t1 + t2); + Vector4 c2 = (t1 - t2); + + /* + c0 = t0 + t3; c3 = t0 - t3; + c1 = t1 + t2; c2 = t1 - t2; + */ + + _mm_store_ps(d, 0, (c0 + c1)); + + _mm_store_ps(d, 32, (c0 - c1)); + + /*y[0] = c0 + c1; + y[4] = c0 - c1;*/ + + Vector4 w0 = new Vector4(0.541196f); + Vector4 w1 = new Vector4(1.306563f); + + _mm_store_ps(d, 16, ((w0 * c2) + (w1 * c3))); + + _mm_store_ps(d, 48, ((w0 * c3) - (w1 * c2))); + /* + y[2] = c2 * r[6] + c3 * r[2]; + y[6] = c3 * r[6] - c2 * r[2]; + */ + + w0 = new Vector4(1.175876f); + w1 = new Vector4(0.785695f); + c3 = ((w0 * t4) + (w1 * t7)); + c0 = ((w0 * t7) - (w1 * t4)); + /* + c3 = t4 * r[3] + t7 * r[5]; + c0 = t7 * r[3] - t4 * r[5]; + */ + + w0 = new Vector4(1.387040f); + w1 = new Vector4(0.275899f); + c2 = ((w0 * t5) + (w1 * t6)); + c1 = ((w0 * t6) - (w1 * t5)); + /* + c2 = t5 * r[1] + t6 * r[7]; + c1 = t6 * r[1] - t5 * r[7]; + */ + + _mm_store_ps(d, 24, (c0 - c2)); + + _mm_store_ps(d, 40, (c3 - c1)); + //y[5] = c3 - c1; y[3] = c0 - c2; + + Vector4 invsqrt2 = new Vector4(0.707107f); + c0 = ((c0 + c2) * invsqrt2); + c3 = ((c3 + c1) * invsqrt2); + //c0 = (c0 + c2) * invsqrt2; + //c3 = (c3 + c1) * invsqrt2; + + _mm_store_ps(d, 8, (c0 + c3)); + + _mm_store_ps(d, 56, (c0 - c3)); + //y[1] = c0 + c3; y[7] = c0 - c3; + + /*for(i = 0;i < 8;i++) + { + y[i] *= invsqrt2h; + }*/ + } + + public static void fDCT8x8_llm_sse(MutableSpan s, MutableSpan d, MutableSpan temp) + { + Transpose8x8(s, temp); + + fDCT2D8x4_32f(temp, d); + + fDCT2D8x4_32f(temp.Slice(4), d.Slice(4)); + + Transpose8x8(d, temp); + + fDCT2D8x4_32f(temp, d); + + fDCT2D8x4_32f(temp.Slice(4), d.Slice(4)); + + Vector4 c = new Vector4(0.1250f); + + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//0 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//1 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//2 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//3 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//4 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//5 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//6 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//7 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//8 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//9 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//10 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//11 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//12 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//13 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//14 + _mm_store_ps(d, 0, (_mm_load_ps(d, 0) * c)); d.AddOffset(4);//15 + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector4 _mm_load_ps(MutableSpan src, int offset) { @@ -329,5 +775,96 @@ namespace ImageSharp.Tests.Formats.Jpg } } } + + internal static void fDCT1Dllm_32f(MutableSpan x, MutableSpan y) + { + float t0, t1, t2, t3, t4, t5, t6, t7; + float c0, c1, c2, c3; + float[] r = new float[8]; + + //for(i = 0;i < 8;i++){ r[i] = (float)(cos((double)i / 16.0 * M_PI) * M_SQRT2); } + r[0] = 1.414214f; + r[1] = 1.387040f; + r[2] = 1.306563f; + r[3] = 1.175876f; + r[4] = 1.000000f; + r[5] = 0.785695f; + r[6] = 0.541196f; + r[7] = 0.275899f; + + const float invsqrt2 = 0.707107f; //(float)(1.0f / M_SQRT2); + const float invsqrt2h = 0.353554f; //invsqrt2*0.5f; + + c1 = x[0]; + c2 = x[7]; + t0 = c1 + c2; + t7 = c1 - c2; + c1 = x[1]; + c2 = x[6]; + t1 = c1 + c2; + t6 = c1 - c2; + c1 = x[2]; + c2 = x[5]; + t2 = c1 + c2; + t5 = c1 - c2; + c1 = x[3]; + c2 = x[4]; + t3 = c1 + c2; + t4 = c1 - c2; + + c0 = t0 + t3; + c3 = t0 - t3; + c1 = t1 + t2; + c2 = t1 - t2; + + y[0] = c0 + c1; + y[4] = c0 - c1; + y[2] = c2 * r[6] + c3 * r[2]; + y[6] = c3 * r[6] - c2 * r[2]; + + c3 = t4 * r[3] + t7 * r[5]; + c0 = t7 * r[3] - t4 * r[5]; + c2 = t5 * r[1] + t6 * r[7]; + c1 = t6 * r[1] - t5 * r[7]; + + y[5] = c3 - c1; + y[3] = c0 - c2; + c0 = (c0 + c2) * invsqrt2; + c3 = (c3 + c1) * invsqrt2; + y[1] = c0 + c3; + y[7] = c0 - c3; + } + + internal static void fDCT2D_llm( + MutableSpan s, + MutableSpan d, + MutableSpan temp, + bool downscaleBy8 = false, + bool offsetSourceByNeg128 = false) + { + MutableSpan sWorker = offsetSourceByNeg128 ? s.AddScalarToAllValues(-128f) : s; + + for (int j = 0; j < 8; j++) + { + fDCT1Dllm_32f(sWorker.Slice(j * 8), temp.Slice(j * 8)); + } + + Transpose8x8(temp, d); + + for (int j = 0; j < 8; j++) + { + fDCT1Dllm_32f(d.Slice(j * 8), temp.Slice(j * 8)); + } + + Transpose8x8(temp, d); + + if (downscaleBy8) + { + for (int j = 0; j < 64; j++) + { + d[j] *= 0.125f; + } + } + } } } \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementationsTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementationsTests.cs new file mode 100644 index 0000000000..265d6b5f23 --- /dev/null +++ b/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementationsTests.cs @@ -0,0 +1,144 @@ +// ReSharper disable InconsistentNaming +namespace ImageSharp.Tests.Formats.Jpg +{ + using System.Numerics; + using ImageSharp.Formats; + using Xunit; + using Xunit.Abstractions; + + public class ReferenceImplementationsTests : UtilityTestClassBase + { + public ReferenceImplementationsTests(ITestOutputHelper output) + : base(output) + { + } + + + [Theory] + [InlineData(42)] + [InlineData(1)] + [InlineData(2)] + public void Idct_FloatingPointReferenceImplementation_IsEquivalentToIntegerImplementation(int seed) + { + MutableSpan intData = Create8x8RandomIntData(-200, 200, seed); + MutableSpan floatSrc = intData.ConvertToFloat32MutableSpan(); + + ReferenceImplementations.IntegerReferenceDCT.TransformIDCTInplace(intData); + + MutableSpan dest = new MutableSpan(64); + MutableSpan temp = new MutableSpan(64); + + ReferenceImplementations.iDCT2D_llm(floatSrc, dest, temp); + + for (int i = 0; i < 64; i++) + { + float expected = intData[i]; + float actual = dest[i]; + + Assert.Equal(expected, actual, new ApproximateFloatComparer(1f)); + } + } + + [Theory] + [InlineData(42, 0)] + [InlineData(1, 0)] + [InlineData(2, 0)] + public void IntegerDCT_ForwardThenInverse(int seed, int startAt) + { + MutableSpan original = Create8x8RandomIntData(-200, 200, seed); + + var block = original.AddScalarToAllValues(128); + + ReferenceImplementations.IntegerReferenceDCT.TransformFDCTInplace(block); + + for (int i = 0; i < 64; i++) + { + block[i] /= 8; + } + + ReferenceImplementations.IntegerReferenceDCT.TransformIDCTInplace(block); + + for (int i = startAt; i < 64; i++) + { + float expected = original[i]; + float actual = (float)block[i]; + + Assert.Equal(expected, actual, new ApproximateFloatComparer(3f)); + } + + } + + [Theory] + [InlineData(42, 0)] + [InlineData(1, 0)] + [InlineData(2, 0)] + public void FloatingPointDCT_ReferenceImplementation_ForwardThenInverse(int seed, int startAt) + { + var data = Create8x8RandomIntData(-200, 200, seed); + MutableSpan src = new MutableSpan(data).ConvertToFloat32MutableSpan(); + MutableSpan dest = new MutableSpan(64); + MutableSpan temp = new MutableSpan(64); + + ReferenceImplementations.fDCT2D_llm(src, dest, temp, true); + ReferenceImplementations.iDCT2D_llm(dest, src, temp); + + for (int i = startAt; i < 64; i++) + { + float expected = data[i]; + float actual = (float)src[i]; + + Assert.Equal(expected, actual, new ApproximateFloatComparer(2f)); + } + } + + [Fact] + public void HowMuchIsTheFish() + { + Output.WriteLine(Vector.Count.ToString()); + } + + [Theory] + [InlineData(42)] + [InlineData(1)] + [InlineData(2)] + public void Fdct_FloatingPointReferenceImplementation_IsEquivalentToIntegerImplementation(int seed) + { + MutableSpan intData = Create8x8RandomIntData(-200, 200, seed); + MutableSpan floatSrc = intData.ConvertToFloat32MutableSpan(); + + ReferenceImplementations.IntegerReferenceDCT.TransformFDCTInplace(intData); + + MutableSpan dest = new MutableSpan(64); + MutableSpan temp = new MutableSpan(64); + + ReferenceImplementations.fDCT2D_llm(floatSrc, dest, temp, offsetSourceByNeg128: true); + + for (int i = 0; i < 64; i++) + { + float expected = intData[i]; + float actual = dest[i]; + + Assert.Equal(expected, actual, new ApproximateFloatComparer(1f)); + } + } + + [Theory] + [InlineData(42)] + [InlineData(1)] + [InlineData(2)] + public void Fdct_SimdReferenceImplementation_IsEquivalentToFloatingPointReferenceImplementation(int seed) + { + Block classic = new Block() { Data = Create8x8RandomIntData(-200, 200, seed) }; + MutableSpan src = new MutableSpan(classic.Data).ConvertToFloat32MutableSpan(); + + MutableSpan dest1 = new MutableSpan(64); + MutableSpan dest2 = new MutableSpan(64); + MutableSpan temp = new MutableSpan(64); + + ReferenceImplementations.fDCT2D_llm(src, dest1, temp, downscaleBy8: true, offsetSourceByNeg128: false); + ReferenceImplementations.fDCT8x8_llm_sse(src, dest2, temp); + + Assert.Equal(dest1.Data, dest2.Data, new ApproximateFloatComparer(1f)); + } + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Formats/Jpg/UtilityTestClassBase.cs b/tests/ImageSharp.Tests/Formats/Jpg/UtilityTestClassBase.cs index 55e609a524..20a9d34b18 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/UtilityTestClassBase.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/UtilityTestClassBase.cs @@ -1,21 +1,51 @@ -using System; -using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Text; +using System.Text; using ImageSharp.Formats; using Xunit.Abstractions; +// ReSharper disable InconsistentNaming namespace ImageSharp.Tests.Formats.Jpg { - public class UtilityTestClassBase + using System; + using System.Collections.Generic; + using System.Diagnostics; + using System.Runtime.CompilerServices; + + /// + /// Utility class to measure the execution of an operation. + /// + public class MeasureFixture { - public UtilityTestClassBase(ITestOutputHelper output) + protected bool EnablePrinting = true; + + protected void Measure(int times, Action action, [CallerMemberName] string operationName = null) + { + if (this.EnablePrinting) this.Output?.WriteLine($"{operationName} X {times} ..."); + Stopwatch sw = Stopwatch.StartNew(); + + for (int i = 0; i < times; i++) + { + action(); + } + + sw.Stop(); + if (this.EnablePrinting) this.Output?.WriteLine($"{operationName} finished in {sw.ElapsedMilliseconds} ms"); + } + + public MeasureFixture(ITestOutputHelper output) { - Output = output; + this.Output = output; } protected ITestOutputHelper Output { get; } + } + + public class UtilityTestClassBase : MeasureFixture + { + public UtilityTestClassBase(ITestOutputHelper output) : base(output) + { + } + // ReSharper disable once InconsistentNaming public static float[] Create8x8FloatData() { @@ -29,10 +59,7 @@ namespace ImageSharp.Tests.Formats.Jpg } return result; } - - - - + // ReSharper disable once InconsistentNaming public static int[] Create8x8IntData() { @@ -47,7 +74,25 @@ namespace ImageSharp.Tests.Formats.Jpg return result; } - internal void Print8x8Data(MutableSpan data) => Print8x8Data(data.Data); + // ReSharper disable once InconsistentNaming + public static int[] Create8x8RandomIntData(int minValue, int maxValue, int seed = 42) + { + Random rnd = new Random(seed); + int[] result = new int[64]; + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + result[i * 8 + j] = rnd.Next(minValue, maxValue); + } + } + return result; + } + + internal static MutableSpan Create8x8RandomFloatData(int minValue, int maxValue, int seed = 42) + => new MutableSpan(Create8x8RandomIntData(minValue, maxValue, seed)).ConvertToFloat32MutableSpan(); + + internal void Print8x8Data(MutableSpan data) => this.Print8x8Data(data.Data); internal void Print8x8Data(T[] data) { @@ -61,10 +106,10 @@ namespace ImageSharp.Tests.Formats.Jpg bld.AppendLine(); } - Output.WriteLine(bld.ToString()); + this.Output.WriteLine(bld.ToString()); } - internal void PrintLinearData(T[] data) => PrintLinearData(new MutableSpan(data), data.Length); + internal void PrintLinearData(T[] data) => this.PrintLinearData(new MutableSpan(data), data.Length); internal void PrintLinearData(MutableSpan data, int count = -1) { @@ -75,21 +120,35 @@ namespace ImageSharp.Tests.Formats.Jpg { bld.Append($"{data[i],3} "); } - Output.WriteLine(bld.ToString()); + this.Output.WriteLine(bld.ToString()); } - protected void Measure(int times, Action action, [CallerMemberName] string operationName = null) + internal struct ApproximateFloatComparer : IEqualityComparer { - Output.WriteLine($"{operationName} X {times} ..."); - Stopwatch sw = Stopwatch.StartNew(); + private readonly float Eps; - for (int i = 0; i < times; i++) + public ApproximateFloatComparer(float eps = 1f) { - action(); + this.Eps = eps; } - sw.Stop(); - Output.WriteLine($"{operationName} finished in {sw.ElapsedMilliseconds} ms"); + public bool Equals(float x, float y) + { + float d = x - y; + + return d > -this.Eps && d < this.Eps; + } + + public int GetHashCode(float obj) + { + throw new InvalidOperationException(); + } + } + + protected void Print(string msg) + { + Debug.WriteLine(msg); + this.Output.WriteLine(msg); } } } \ No newline at end of file diff --git a/tests/ImageSharp.Tests46/Formats/Jpg/ReferenceImplementationsTests.cs b/tests/ImageSharp.Tests46/Formats/Jpg/ReferenceImplementationsTests.cs new file mode 100644 index 0000000000..265d6b5f23 --- /dev/null +++ b/tests/ImageSharp.Tests46/Formats/Jpg/ReferenceImplementationsTests.cs @@ -0,0 +1,144 @@ +// ReSharper disable InconsistentNaming +namespace ImageSharp.Tests.Formats.Jpg +{ + using System.Numerics; + using ImageSharp.Formats; + using Xunit; + using Xunit.Abstractions; + + public class ReferenceImplementationsTests : UtilityTestClassBase + { + public ReferenceImplementationsTests(ITestOutputHelper output) + : base(output) + { + } + + + [Theory] + [InlineData(42)] + [InlineData(1)] + [InlineData(2)] + public void Idct_FloatingPointReferenceImplementation_IsEquivalentToIntegerImplementation(int seed) + { + MutableSpan intData = Create8x8RandomIntData(-200, 200, seed); + MutableSpan floatSrc = intData.ConvertToFloat32MutableSpan(); + + ReferenceImplementations.IntegerReferenceDCT.TransformIDCTInplace(intData); + + MutableSpan dest = new MutableSpan(64); + MutableSpan temp = new MutableSpan(64); + + ReferenceImplementations.iDCT2D_llm(floatSrc, dest, temp); + + for (int i = 0; i < 64; i++) + { + float expected = intData[i]; + float actual = dest[i]; + + Assert.Equal(expected, actual, new ApproximateFloatComparer(1f)); + } + } + + [Theory] + [InlineData(42, 0)] + [InlineData(1, 0)] + [InlineData(2, 0)] + public void IntegerDCT_ForwardThenInverse(int seed, int startAt) + { + MutableSpan original = Create8x8RandomIntData(-200, 200, seed); + + var block = original.AddScalarToAllValues(128); + + ReferenceImplementations.IntegerReferenceDCT.TransformFDCTInplace(block); + + for (int i = 0; i < 64; i++) + { + block[i] /= 8; + } + + ReferenceImplementations.IntegerReferenceDCT.TransformIDCTInplace(block); + + for (int i = startAt; i < 64; i++) + { + float expected = original[i]; + float actual = (float)block[i]; + + Assert.Equal(expected, actual, new ApproximateFloatComparer(3f)); + } + + } + + [Theory] + [InlineData(42, 0)] + [InlineData(1, 0)] + [InlineData(2, 0)] + public void FloatingPointDCT_ReferenceImplementation_ForwardThenInverse(int seed, int startAt) + { + var data = Create8x8RandomIntData(-200, 200, seed); + MutableSpan src = new MutableSpan(data).ConvertToFloat32MutableSpan(); + MutableSpan dest = new MutableSpan(64); + MutableSpan temp = new MutableSpan(64); + + ReferenceImplementations.fDCT2D_llm(src, dest, temp, true); + ReferenceImplementations.iDCT2D_llm(dest, src, temp); + + for (int i = startAt; i < 64; i++) + { + float expected = data[i]; + float actual = (float)src[i]; + + Assert.Equal(expected, actual, new ApproximateFloatComparer(2f)); + } + } + + [Fact] + public void HowMuchIsTheFish() + { + Output.WriteLine(Vector.Count.ToString()); + } + + [Theory] + [InlineData(42)] + [InlineData(1)] + [InlineData(2)] + public void Fdct_FloatingPointReferenceImplementation_IsEquivalentToIntegerImplementation(int seed) + { + MutableSpan intData = Create8x8RandomIntData(-200, 200, seed); + MutableSpan floatSrc = intData.ConvertToFloat32MutableSpan(); + + ReferenceImplementations.IntegerReferenceDCT.TransformFDCTInplace(intData); + + MutableSpan dest = new MutableSpan(64); + MutableSpan temp = new MutableSpan(64); + + ReferenceImplementations.fDCT2D_llm(floatSrc, dest, temp, offsetSourceByNeg128: true); + + for (int i = 0; i < 64; i++) + { + float expected = intData[i]; + float actual = dest[i]; + + Assert.Equal(expected, actual, new ApproximateFloatComparer(1f)); + } + } + + [Theory] + [InlineData(42)] + [InlineData(1)] + [InlineData(2)] + public void Fdct_SimdReferenceImplementation_IsEquivalentToFloatingPointReferenceImplementation(int seed) + { + Block classic = new Block() { Data = Create8x8RandomIntData(-200, 200, seed) }; + MutableSpan src = new MutableSpan(classic.Data).ConvertToFloat32MutableSpan(); + + MutableSpan dest1 = new MutableSpan(64); + MutableSpan dest2 = new MutableSpan(64); + MutableSpan temp = new MutableSpan(64); + + ReferenceImplementations.fDCT2D_llm(src, dest1, temp, downscaleBy8: true, offsetSourceByNeg128: false); + ReferenceImplementations.fDCT8x8_llm_sse(src, dest2, temp); + + Assert.Equal(dest1.Data, dest2.Data, new ApproximateFloatComparer(1f)); + } + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Tests46/ImageSharp.Tests46.csproj b/tests/ImageSharp.Tests46/ImageSharp.Tests46.csproj index 54227f0253..2f03fa24c2 100644 --- a/tests/ImageSharp.Tests46/ImageSharp.Tests46.csproj +++ b/tests/ImageSharp.Tests46/ImageSharp.Tests46.csproj @@ -157,6 +157,7 @@ TestUtilities\TestUtilityExtensions.cs +