From fb25ac95aa5a3a328f6aea463caa69dad311adcf Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Mon, 14 Nov 2016 01:17:46 +0100 Subject: [PATCH] hyper-efficient color copying --- .../Jpg/Components/Block8x8F.Generated.cs | 33 +++ .../Jpg/Components/Block8x8F.Generated.tt | 52 ++++- .../Formats/Jpg/Components/Block8x8F.cs | 214 ++++++++++++------ .../Formats/Jpg/Components/FloatIDCT.cs | 208 ----------------- .../Components/{Span.cs => MutableSpan.cs} | 45 ++-- .../Formats/Jpg/JpegDecoderCore.cs | 4 +- src/ImageSharp46/ImageSharp46.csproj | 3 +- .../{Block8x8Tests.cs => Block8x8FTests.cs} | 189 ++++++++++------ .../Formats/Jpg/DctTests.cs | 6 +- ...enceDCT.cs => ReferenceImplementations.cs} | 195 +++++----------- .../Formats/Jpg/UtilityTestClassBase.cs | 9 +- .../ImageSharp.Tests46.csproj | 4 +- 12 files changed, 446 insertions(+), 516 deletions(-) delete mode 100644 src/ImageSharp46/Formats/Jpg/Components/FloatIDCT.cs rename src/ImageSharp46/Formats/Jpg/Components/{Span.cs => MutableSpan.cs} (58%) rename tests/ImageSharp.Tests46/Formats/Jpg/{Block8x8Tests.cs => Block8x8FTests.cs} (70%) rename tests/ImageSharp.Tests46/Formats/Jpg/{ReferenceDCT.cs => ReferenceImplementations.cs} (62%) diff --git a/src/ImageSharp46/Formats/Jpg/Components/Block8x8F.Generated.cs b/src/ImageSharp46/Formats/Jpg/Components/Block8x8F.Generated.cs index 0e9abecce..1eb15c952 100644 --- a/src/ImageSharp46/Formats/Jpg/Components/Block8x8F.Generated.cs +++ b/src/ImageSharp46/Formats/Jpg/Components/Block8x8F.Generated.cs @@ -3,6 +3,7 @@ using System; using System.Numerics; using System.Runtime.CompilerServices; + namespace ImageSharp.Formats { internal partial struct Block8x8F @@ -19,5 +20,37 @@ namespace ImageSharp.Formats d.V0R.Z = V6L.X; d.V1R.Z = V6L.Y; d.V2R.Z = V6L.Z; d.V3R.Z = V6L.W; d.V4R.Z = V6R.X; d.V5R.Z = V6R.Y; d.V6R.Z = V6R.Z; d.V7R.Z = V6R.W; d.V0R.W = V7L.X; d.V1R.W = V7L.Y; d.V2R.W = V7L.Z; d.V3R.W = V7L.W; d.V4R.W = V7R.X; d.V5R.W = V7R.Y; d.V6R.W = V7R.Z; d.V7R.W = V7R.W; } + + + public void CropInto(float min, float max, ref Block8x8F d) + { + Vector4 minVec = new Vector4(min); + Vector4 maxVec = new Vector4(max); + + d.V0L = Vector4.Max(Vector4.Min(V0L, maxVec), minVec);d.V0R = Vector4.Max(Vector4.Min(V0R, maxVec), minVec); + d.V1L = Vector4.Max(Vector4.Min(V1L, maxVec), minVec);d.V1R = Vector4.Max(Vector4.Min(V1R, maxVec), minVec); + d.V2L = Vector4.Max(Vector4.Min(V2L, maxVec), minVec);d.V2R = Vector4.Max(Vector4.Min(V2R, maxVec), minVec); + d.V3L = Vector4.Max(Vector4.Min(V3L, maxVec), minVec);d.V3R = Vector4.Max(Vector4.Min(V3R, maxVec), minVec); + d.V4L = Vector4.Max(Vector4.Min(V4L, maxVec), minVec);d.V4R = Vector4.Max(Vector4.Min(V4R, maxVec), minVec); + d.V5L = Vector4.Max(Vector4.Min(V5L, maxVec), minVec);d.V5R = Vector4.Max(Vector4.Min(V5R, maxVec), minVec); + d.V6L = Vector4.Max(Vector4.Min(V6L, maxVec), minVec);d.V6R = Vector4.Max(Vector4.Min(V6R, maxVec), minVec); + d.V7L = Vector4.Max(Vector4.Min(V7L, maxVec), minVec);d.V7R = Vector4.Max(Vector4.Min(V7R, maxVec), minVec); + } + + internal void ColorifyInto(ref Block8x8F d) + { + + + d.V0L = Vector4.Max(Vector4.Min(V0L, CMax4), CMin4) + COff4;d.V0R = Vector4.Max(Vector4.Min(V0R, CMax4), CMin4) + COff4; + d.V1L = Vector4.Max(Vector4.Min(V1L, CMax4), CMin4) + COff4;d.V1R = Vector4.Max(Vector4.Min(V1R, CMax4), CMin4) + COff4; + d.V2L = Vector4.Max(Vector4.Min(V2L, CMax4), CMin4) + COff4;d.V2R = Vector4.Max(Vector4.Min(V2R, CMax4), CMin4) + COff4; + d.V3L = Vector4.Max(Vector4.Min(V3L, CMax4), CMin4) + COff4;d.V3R = Vector4.Max(Vector4.Min(V3R, CMax4), CMin4) + COff4; + d.V4L = Vector4.Max(Vector4.Min(V4L, CMax4), CMin4) + COff4;d.V4R = Vector4.Max(Vector4.Min(V4R, CMax4), CMin4) + COff4; + d.V5L = Vector4.Max(Vector4.Min(V5L, CMax4), CMin4) + COff4;d.V5R = Vector4.Max(Vector4.Min(V5R, CMax4), CMin4) + COff4; + d.V6L = Vector4.Max(Vector4.Min(V6L, CMax4), CMin4) + COff4;d.V6R = Vector4.Max(Vector4.Min(V6R, CMax4), CMin4) + COff4; + d.V7L = Vector4.Max(Vector4.Min(V7L, CMax4), CMin4) + COff4;d.V7R = Vector4.Max(Vector4.Min(V7R, CMax4), CMin4) + COff4; + } + + } } diff --git a/src/ImageSharp46/Formats/Jpg/Components/Block8x8F.Generated.tt b/src/ImageSharp46/Formats/Jpg/Components/Block8x8F.Generated.tt index 9ead63b87..390931285 100644 --- a/src/ImageSharp46/Formats/Jpg/Components/Block8x8F.Generated.tt +++ b/src/ImageSharp46/Formats/Jpg/Components/Block8x8F.Generated.tt @@ -9,6 +9,10 @@ using System; using System.Numerics; using System.Runtime.CompilerServices; +<# +char[] coordz = new[] {'X', 'Y', 'Z', 'W'}; +#> + namespace ImageSharp.Formats { internal partial struct Block8x8F @@ -17,8 +21,6 @@ namespace ImageSharp.Formats public void TransposeInto(ref Block8x8F d) { <# - char[] coordz = new[] {'X', 'Y', 'Z', 'W'}; - //StringBuilder bld = new StringBuilder(); PushIndent(" "); for (int i = 0; i < 8; i++) @@ -42,5 +44,51 @@ namespace ImageSharp.Formats //Write(bld.ToString()); #> } + + + public void CropInto(float min, float max, ref Block8x8F d) + { + Vector4 minVec = new Vector4(min); + Vector4 maxVec = new Vector4(max); + + <# + + PushIndent(" "); + + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 2; j++) + { + char side = j == 0 ? 'L' : 'R'; + Write($"d.V{i}{side} = Vector4.Max(Vector4.Min(V{i}{side}, maxVec), minVec);"); + } + WriteLine(""); + } + PopIndent(); + #> + } + + internal void ColorifyInto(ref Block8x8F d) + { + + + <# + + PushIndent(" "); + + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 2; j++) + { + char side = j == 0 ? 'L' : 'R'; + Write($"d.V{i}{side} = Vector4.Max(Vector4.Min(V{i}{side}, CMax4), CMin4) + COff4;"); + } + WriteLine(""); + } + PopIndent(); + #> + } + + } } diff --git a/src/ImageSharp46/Formats/Jpg/Components/Block8x8F.cs b/src/ImageSharp46/Formats/Jpg/Components/Block8x8F.cs index bc18f9843..02fe79f17 100644 --- a/src/ImageSharp46/Formats/Jpg/Components/Block8x8F.cs +++ b/src/ImageSharp46/Formats/Jpg/Components/Block8x8F.cs @@ -1,4 +1,5 @@ using System; +using System.Buffers; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -7,6 +8,9 @@ using System.Runtime.InteropServices; namespace ImageSharp.Formats { + /// + /// DCT code Ported from https://github.com/norishigefukushima/dct_simd + /// internal partial struct Block8x8F { public Vector4 V0L; @@ -35,14 +39,16 @@ namespace ImageSharp.Formats public const int VectorCount = 16; - public const int ScalarCount = VectorCount * 4; + public const int ScalarCount = VectorCount*4; + + private static readonly ArrayPool ScalarArrayPool = ArrayPool.Create(ScalarCount, 50); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe void LoadFrom(Span source) + public unsafe void LoadFrom(MutableSpan source) { fixed (Vector4* ptr = &V0L) { - Marshal.Copy(source.Data, source.Offset, (IntPtr)ptr, ScalarCount); + Marshal.Copy(source.Data, source.Offset, (IntPtr) ptr, ScalarCount); //float* fp = (float*)ptr; //for (int i = 0; i < ScalarCount; i++) //{ @@ -52,37 +58,42 @@ namespace ImageSharp.Formats } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe void CopyTo(Span dest) + public unsafe void CopyTo(MutableSpan dest) { fixed (Vector4* ptr = &V0L) { - Marshal.Copy((IntPtr)ptr, dest.Data, dest.Offset, ScalarCount); - //float* fp = (float*)ptr; - //for (int i = 0; i < ScalarCount; i++) - //{ - // dest[i] = fp[i]; - //} + Marshal.Copy((IntPtr) ptr, dest.Data, dest.Offset, ScalarCount); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe void LoadFrom(Block8x8F* blockPtr, Span source) + public unsafe void CopyTo(float[] dest) + { + fixed (Vector4* ptr = &V0L) + { + Marshal.Copy((IntPtr) ptr, dest, 0, ScalarCount); + } + } + + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static unsafe void LoadFrom(Block8x8F* blockPtr, MutableSpan source) { - Marshal.Copy(source.Data, source.Offset, (IntPtr)blockPtr, ScalarCount); + Marshal.Copy(source.Data, source.Offset, (IntPtr) blockPtr, ScalarCount); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe void CopyTo(Block8x8F* blockPtr, Span dest) + public static unsafe void CopyTo(Block8x8F* blockPtr, MutableSpan dest) { - Marshal.Copy((IntPtr)blockPtr, dest.Data, dest.Offset, ScalarCount); + Marshal.Copy((IntPtr) blockPtr, dest.Data, dest.Offset, ScalarCount); } - internal unsafe void LoadFrom(Span source) + internal unsafe void LoadFrom(MutableSpan source) { fixed (Vector4* ptr = &V0L) { - float* fp = (float*)ptr; + float* fp = (float*) ptr; for (int i = 0; i < ScalarCount; i++) { fp[i] = source[i]; @@ -90,11 +101,11 @@ namespace ImageSharp.Formats } } - internal unsafe void CopyTo(Span dest) + internal unsafe void CopyTo(MutableSpan dest) { fixed (Vector4* ptr = &V0L) { - float* fp = (float*)ptr; + float* fp = (float*) ptr; for (int i = 0; i < ScalarCount; i++) { dest[i] = (int) fp[i]; @@ -110,16 +121,16 @@ namespace ImageSharp.Formats for (int i = 1; i < 8; i++) { - int i8 = i * 8; + int i8 = i*8; for (int j = 0; j < i; j++) { float tmp = data[i8 + j]; - data[i8 + j] = data[j * 8 + i]; - data[j * 8 + i] = tmp; + data[i8 + j] = data[j*8 + i]; + data[j*8 + i] = tmp; } } } - + } /// @@ -129,7 +140,7 @@ namespace ImageSharp.Formats { fixed (Vector4* sPtr = &V0L) { - float* src = (float*)sPtr; + float* src = (float*) sPtr; fixed (Vector4* dPtr = &destination.V0L) { @@ -137,10 +148,10 @@ namespace ImageSharp.Formats for (int i = 0; i < 8; i++) { - int i8 = i * 8; + int i8 = i*8; for (int j = 0; j < 8; j++) { - dest[j * 8 + i] = src[i8 + j]; + dest[j*8 + i] = src[i8 + j]; } } } @@ -152,26 +163,38 @@ namespace ImageSharp.Formats [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe void TransposeInto(Block8x8F* sourcePtr, Block8x8F* destPtr) { - float* src = (float*)sourcePtr; + float* src = (float*) sourcePtr; float* dest = (float*) destPtr; for (int i = 0; i < 8; i++) { - int i8 = i * 8; + int i8 = i*8; for (int j = 0; j < 8; j++) { - dest[j * 8 + i] = src[i8 + j]; + dest[j*8 + i] = src[i8 + j]; } } } - + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void MultiplyAllInplace(Vector4 s) { - V0L *= s; V0R *= s; V1L *= s; V1R *= s; - V2L *= s; V2R *= s; V3L *= s; V3R *= s; - V4L *= s; V4R *= s; V5L *= s; V5R *= s; - V6L *= s; V6R *= s; V7L *= s; V7R *= s; + V0L *= s; + V0R *= s; + V1L *= s; + V1R *= s; + V2L *= s; + V2R *= s; + V3L *= s; + V3R *= s; + V4L *= s; + V4R *= s; + V5L *= s; + V5R *= s; + V6L *= s; + V6R *= s; + V7L *= s; + V7R *= s; } // ReSharper disable once InconsistentNaming @@ -180,9 +203,9 @@ namespace ImageSharp.Formats TransposeInto(ref temp); temp.iDCT2D8x4_LeftPart(ref dest); temp.iDCT2D8x4_RightPart(ref dest); - + dest.TransposeInto(ref temp); - + temp.iDCT2D8x4_LeftPart(ref dest); temp.iDCT2D8x4_RightPart(ref dest); @@ -229,7 +252,7 @@ namespace ImageSharp.Formats 6: 7: 0.275899 */ - + Vector4 my1 = V1L; Vector4 my7 = V7L; Vector4 mz0 = my1 + my7; @@ -240,14 +263,14 @@ namespace ImageSharp.Formats Vector4 mz1 = my3 + my5; Vector4 mz3 = my1 + my5; - Vector4 mz4 = ((mz0 + mz1) * _1_175876); + Vector4 mz4 = ((mz0 + mz1)*_1_175876); //z0 = y[1] + y[7]; z1 = y[3] + y[5]; z2 = y[3] + y[7]; z3 = y[1] + y[5]; //z4 = (z0 + z1) * r[3]; - mz2 = mz2 * _1_961571 + mz4; - mz3 = mz3 * _0_390181 + mz4; - mz0 = mz0 * _0_899976; - mz1 = mz1 * _2_562915; + mz2 = mz2*_1_961571 + mz4; + mz3 = mz3*_0_390181 + mz4; + mz0 = mz0*_0_899976; + mz1 = mz1*_2_562915; /* -0.899976 @@ -260,10 +283,10 @@ namespace ImageSharp.Formats z3 = z3 * (-r[3] + r[5]) + z4;*/ - Vector4 mb3 = my7 * _0_298631 + mz0 + mz2; - Vector4 mb2 = my5 * _2_053120 + mz1 + mz3; - Vector4 mb1 = my3 * _3_072711 + mz1 + mz2; - Vector4 mb0 = my1 * _1_501321 + mz0 + mz3; + Vector4 mb3 = my7*_0_298631 + mz0 + mz2; + Vector4 mb2 = my5*_2_053120 + mz1 + mz3; + Vector4 mb1 = my3*_3_072711 + mz1 + mz2; + Vector4 mb0 = my1*_1_501321 + mz0 + mz3; /* 0.298631 @@ -278,14 +301,14 @@ namespace ImageSharp.Formats Vector4 my2 = V2L; Vector4 my6 = V6L; - mz4 = (my2 + my6) * _0_541196; + mz4 = (my2 + my6)*_0_541196; Vector4 my0 = V0L; Vector4 my4 = V4L; mz0 = my0 + my4; mz1 = my0 - my4; - mz2 = mz4 + my6 * _1_847759; - mz3 = mz4 + my2 * _0_765367; + mz2 = mz4 + my6*_1_847759; + mz3 = mz4 + my2*_0_765367; my0 = mz0 + mz3; my3 = mz0 - mz3; @@ -301,7 +324,7 @@ namespace ImageSharp.Formats a0 = z0 + z3; a3 = z0 - z3; a1 = z1 + z2; a2 = z1 - z2; */ - + d.V0L = my0 + mb0; d.V7L = my0 - mb0; d.V1L = my1 + mb1; @@ -347,14 +370,14 @@ namespace ImageSharp.Formats Vector4 mz1 = my3 + my5; Vector4 mz3 = my1 + my5; - Vector4 mz4 = ((mz0 + mz1) * _1_175876); + Vector4 mz4 = ((mz0 + mz1)*_1_175876); //z0 = y[1] + y[7]; z1 = y[3] + y[5]; z2 = y[3] + y[7]; z3 = y[1] + y[5]; //z4 = (z0 + z1) * r[3]; - mz2 = mz2 * _1_961571 + mz4; - mz3 = mz3 * _0_390181 + mz4; - mz0 = mz0 * _0_899976; - mz1 = mz1 * _2_562915; + mz2 = mz2*_1_961571 + mz4; + mz3 = mz3*_0_390181 + mz4; + mz0 = mz0*_0_899976; + mz1 = mz1*_2_562915; /* -0.899976 @@ -367,10 +390,10 @@ namespace ImageSharp.Formats z3 = z3 * (-r[3] + r[5]) + z4;*/ - Vector4 mb3 = my7 * _0_298631 + mz0 + mz2; - Vector4 mb2 = my5 * _2_053120 + mz1 + mz3; - Vector4 mb1 = my3 * _3_072711 + mz1 + mz2; - Vector4 mb0 = my1 * _1_501321 + mz0 + mz3; + Vector4 mb3 = my7*_0_298631 + mz0 + mz2; + Vector4 mb2 = my5*_2_053120 + mz1 + mz3; + Vector4 mb1 = my3*_3_072711 + mz1 + mz2; + Vector4 mb0 = my1*_1_501321 + mz0 + mz3; /* 0.298631 @@ -385,14 +408,14 @@ namespace ImageSharp.Formats Vector4 my2 = V2R; Vector4 my6 = V6R; - mz4 = (my2 + my6) * _0_541196; + mz4 = (my2 + my6)*_0_541196; Vector4 my0 = V0R; Vector4 my4 = V4R; mz0 = my0 + my4; mz1 = my0 - my4; - mz2 = mz4 + my6 * _1_847759; - mz3 = mz4 + my2 * _0_765367; + mz2 = mz4 + my6*_1_847759; + mz3 = mz4 + my2*_0_765367; my0 = mz0 + mz3; my3 = mz0 - mz3; @@ -425,7 +448,7 @@ namespace ImageSharp.Formats for(i = 0;i < 8;i++){ x[i] *= 0.353554f; } */ } - + internal static void SuchIDCT(ref Block block) { Block8x8F source = new Block8x8F(); @@ -433,7 +456,7 @@ namespace ImageSharp.Formats Block8x8F dest = new Block8x8F(); Block8x8F temp = new Block8x8F(); - + source.IDCTInto(ref dest, ref temp); dest.CopyTo(block.Data); } @@ -466,7 +489,7 @@ namespace ImageSharp.Formats { fixed (Block8x8F* p = &this) { - float* fp = (float*)p; + float* fp = (float*) p; fp[idx] = value; } } @@ -482,7 +505,7 @@ namespace ImageSharp.Formats [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static unsafe void SetScalarAt(Block8x8F* blockPtr, int idx, float value) { - float* fp = (float*)blockPtr; + float* fp = (float*) blockPtr; fp[idx] = value; } @@ -502,7 +525,28 @@ namespace ImageSharp.Formats CopyTo(legacyBlock.Data); } - internal unsafe void CopyColorsTo(Span buffer, int stride) + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static byte ToColorByte(float c) + { + if (c < -128) + { + return 0; + } + else if (c > 127) + { + return 255; + } + else + { + c += 128; + return (byte) c; + } + } + + + + internal unsafe void CopyColorsTo(MutableSpan buffer, int stride) { fixed (Block8x8F* p = &this) { @@ -510,8 +554,8 @@ namespace ImageSharp.Formats for (int y = 0; y < 8; y++) { - int y8 = y * 8; - int yStride = y * stride; + int y8 = y*8; + int yStride = y*stride; for (int x = 0; x < 8; x++) { @@ -531,13 +575,43 @@ namespace ImageSharp.Formats } buffer[yStride + x] = (byte) c; - - //dst[yStride + x + offset] = (byte)c; } } } + } + + private static readonly Vector4 CMin4 = new Vector4(-128f); + private static readonly Vector4 CMax4 = new Vector4(127f); + private static readonly Vector4 COff4 = new Vector4(128f); + + /// + /// Level shift by +128, clip to [0, 255], and write to buffer. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal unsafe void CopyColorsTo( + MutableSpan buffer, + int stride, + Block8x8F* temp) + { + ColorifyInto(ref *temp); + float* src = (float*) temp; + for (int i = 0; i < 8; i++) + { + buffer[0] = (byte) src[0]; + buffer[1] = (byte) src[1]; + buffer[2] = (byte) src[2]; + buffer[3] = (byte) src[3]; + buffer[4] = (byte) src[4]; + buffer[5] = (byte) src[5]; + buffer[6] = (byte) src[6]; + buffer[7] = (byte) src[7]; + buffer.AddOffset(stride); + src += 8; + } } + + } } \ No newline at end of file diff --git a/src/ImageSharp46/Formats/Jpg/Components/FloatIDCT.cs b/src/ImageSharp46/Formats/Jpg/Components/FloatIDCT.cs deleted file mode 100644 index 5f2bd68b9..000000000 --- a/src/ImageSharp46/Formats/Jpg/Components/FloatIDCT.cs +++ /dev/null @@ -1,208 +0,0 @@ -using System; -using System.Buffers; - -namespace ImageSharp.Formats -{ - internal class FloatIDCT - { - //private float[] _temp = new float[64]; - - // Cosine matrix and transposed cosine matrix - private static readonly float[] c = buildC(); - private static readonly float[] cT = buildCT(); - - internal FloatIDCT() - { -#if DYNAMIC_IDCT - dynamicIDCT = dynamicIDCT ?? EmitIDCT(); -#endif - } - - /// - /// Precomputes cosine terms in A.3.3 of - /// http://www.w3.org/Graphics/JPEG/itu-t81.pdf - /// - /// Closely follows the term precomputation in the - /// Java Advanced Imaging library. - /// - private static float[] buildC() - { - float[] c = new float[64]; - - for (int i = 0; i < 8; i++) // i == u or v - { - for (int j = 0; j < 8; j++) // j == x or y - { - c[i*8 + j] = i == 0 ? - 0.353553391f : /* 1 / SQRT(8) */ - (float)(0.5 * Math.Cos(((2.0 * j + 1) * i * Math.PI) / 16.0)); - } - } - - return c; - } - private static float[] buildCT() - { - // Transpose i,k <-- j,i - float[] cT = new float[64]; - for (int i = 0; i < 8; i++) - for (int j = 0; j < 8; j++) - cT[j * 8 + i] = c[i * 8 + j]; - return cT; - } - - public static void SetValueClipped(byte[,] arr, int i, int j, float val) - { - // Clip into the 0...255 range & round - arr[i, j] = val < 0 ? (byte)0 - : val > 255 ? (byte)255 - : (byte)(val + 0.5); - } - - public static void Transform(ref Block block) => FastIDCT(block.Data); - - /// See figure A.3.3 IDCT (informative) on A-5. - /// http://www.w3.org/Graphics/JPEG/itu-t81.pdf - public static void FastIDCT(int[] output) - { - //byte[,] output = new byte[8, 8]; - //int[] output = new int[64]; - - float[] _temp = ArrayPool.Shared.Rent(64); - - float[] input = ArrayPool.Shared.Rent(64); - - for (int i = 0; i < output.Length; i++) - { - input[i] = output[i]; - } - - float temp, val = 0; - int idx = 0; - for (int i = 0; i < 8; i++) - { - int i8 = i * 8; - for (int j = 0; j < 8; j++) - { - val = 0; - - for (int k = 0; k < 8; k++) - { - val += input[i8 + k] * c[k*8 + j]; - } - - _temp[idx++] = val; - } - } - for (int i = 0; i < 8; i++) - { - int i8 = i*8; - for (int j = 0; j < 8; j++) - { - temp = 128f; - - for (int k = 0; k < 8; k++) - { - temp += cT[i*8 + k] * _temp[k * 8 + j]; - } - - if (temp < 0) output[i8 + j] = 0; - else if (temp > 255) output[i8+ j] = 255; - else output[i8 + j] = (int)(temp + 0.5); // Implements rounding - } - } - - ArrayPool.Shared.Return(input, true); - ArrayPool.Shared.Return(_temp, true); - } - - - -#if DYNAMIC_IDCT - -/// -/// Generates a pure-IL nonbranching stream of instructions -/// that perform the inverse DCT. Relies on helper function -/// SetValueClipped. -/// -/// A delegate to the DynamicMethod - private static IDCTFunc EmitIDCT() - { - Type[] args = { typeof(float[]), typeof(float[]), typeof(byte[,]) }; - - DynamicMethod idctMethod = new DynamicMethod("dynamicIDCT", - null, // no return type - args); // input arrays - - ILGenerator il = idctMethod.GetILGenerator(); - - int idx = 0; - - for (int i = 0; i < 8; i++) - { - for (int j = 0; j < 8; j++) - { - il.Emit(OpCodes.Ldarg_1); // 1 {temp} - il.Emit(OpCodes.Ldc_I4_S, (short)idx++); // 3 {temp, idx} - - for (int k = 0; k < 8; k++) - { - il.Emit(OpCodes.Ldarg_0); // {in} - il.Emit(OpCodes.Ldc_I4_S, (short)(i * 8 + k)); // {in,idx} - il.Emit(OpCodes.Ldelem_R4); // {in[idx]} - il.Emit(OpCodes.Ldc_R4, c[k, j]); // {in[idx],c[k,j]} - il.Emit(OpCodes.Mul); // {in[idx]*c[k,j]} - if (k != 0) il.Emit(OpCodes.Add); - } - - il.Emit(OpCodes.Stelem_R4); // {} - } - } - - var meth = typeof(DCT).GetMethod("SetValueClipped", - BindingFlags.Static | BindingFlags.Public, null, - CallingConventions.Standard, - new Type[] { - typeof(byte[,]), // arr - typeof(int), // i - typeof(int), // j - typeof(float) } // val - , null); - - for (int i = 0; i < 8; i++) - { - for (int j = 0; j < 8; j++) - { - il.Emit(OpCodes.Ldarg_2); // {output} - il.Emit(OpCodes.Ldc_I4_S, (short)i); // {output,i} - il.Emit(OpCodes.Ldc_I4_S, (short)j); // X={output,i,j} - - il.Emit(OpCodes.Ldc_R4, 128.0f); // {X,128.0f} - - for (int k = 0; k < 8; k++) - { - il.Emit(OpCodes.Ldarg_1); // {X,temp} - il.Emit(OpCodes.Ldc_I4_S, - (short)(k * 8 + j)); // {X,temp,idx} - il.Emit(OpCodes.Ldelem_R4); // {X,temp[idx]} - il.Emit(OpCodes.Ldc_R4, cT[i, k]); // {X,temp[idx],cT[i,k]} - il.Emit(OpCodes.Mul); // {X,in[idx]*c[k,j]} - il.Emit(OpCodes.Add); - } - - il.EmitCall(OpCodes.Call, meth, null); - } - } - - il.Emit(OpCodes.Ret); - - return (IDCTFunc)idctMethod.CreateDelegate(typeof(IDCTFunc)); - } - - private delegate void IDCTFunc(float[] input, float[] temp, byte[,] output); - private static IDCTFunc dynamicIDCT = null; -#endif - - - } -} \ No newline at end of file diff --git a/src/ImageSharp46/Formats/Jpg/Components/Span.cs b/src/ImageSharp46/Formats/Jpg/Components/MutableSpan.cs similarity index 58% rename from src/ImageSharp46/Formats/Jpg/Components/Span.cs rename to src/ImageSharp46/Formats/Jpg/Components/MutableSpan.cs index 2e19c18ca..0cb11690b 100644 --- a/src/ImageSharp46/Formats/Jpg/Components/Span.cs +++ b/src/ImageSharp46/Formats/Jpg/Components/MutableSpan.cs @@ -4,20 +4,26 @@ using System.Runtime.CompilerServices; namespace ImageSharp.Formats { - internal struct Span + /// + /// Like corefxlab Span, but with an AddOffset() method for efficiency. + /// TODO: When Span will be official, consider replacing this class! + /// + /// + /// + internal struct MutableSpan { public T[] Data; public int Offset; public int TotalCount => Data.Length - Offset; - public Span(int size, int offset = 0) + public MutableSpan(int size, int offset = 0) { Data = new T[size]; Offset = offset; } - public Span(T[] data, int offset = 0) + public MutableSpan(T[] data, int offset = 0) { Data = data; Offset = offset; @@ -30,26 +36,13 @@ namespace ImageSharp.Formats } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Span Slice(int offset) + public MutableSpan Slice(int offset) { - return new Span(Data, Offset + offset); - } - - public static implicit operator Span(T[] data) => new Span(data, 0); - - private static readonly ArrayPool Pool = ArrayPool.Create(128, 10); - - public static Span RentFromPool(int size, int offset = 0) - { - return new Span(Pool.Rent(size), offset); - } - - public void ReturnToPool() - { - Pool.Return(Data, true); - Data = null; + return new MutableSpan(Data, Offset + offset); } + public static implicit operator MutableSpan(T[] data) => new MutableSpan(data, 0); + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void AddOffset(int offset) { @@ -57,10 +50,12 @@ namespace ImageSharp.Formats } } - internal static class SpanExtensions + internal static class MutableSpanExtensions { + public static MutableSpan Slice(this T[] array, int offset) => new MutableSpan(array, offset); + [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void SaveTo(this Span data, ref Vector4 v) + public static void SaveTo(this MutableSpan data, ref Vector4 v) { v.X = data[0]; v.Y = data[1]; @@ -69,7 +64,7 @@ namespace ImageSharp.Formats } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void SaveTo(this Span data, ref Vector4 v) + public static void SaveTo(this MutableSpan data, ref Vector4 v) { v.X = data[0]; v.Y = data[1]; @@ -78,7 +73,7 @@ namespace ImageSharp.Formats } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void LoadFrom(this Span data, ref Vector4 v) + public static void LoadFrom(this MutableSpan data, ref Vector4 v) { data[0] = v.X; data[1] = v.Y; @@ -87,7 +82,7 @@ namespace ImageSharp.Formats } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void LoadFrom(this Span data, ref Vector4 v) + public static void LoadFrom(this MutableSpan data, ref Vector4 v) { data[0] = (int)v.X; data[1] = (int)v.Y; diff --git a/src/ImageSharp46/Formats/Jpg/JpegDecoderCore.cs b/src/ImageSharp46/Formats/Jpg/JpegDecoderCore.cs index 3542fddcd..a2a570aed 100644 --- a/src/ImageSharp46/Formats/Jpg/JpegDecoderCore.cs +++ b/src/ImageSharp46/Formats/Jpg/JpegDecoderCore.cs @@ -1886,7 +1886,9 @@ namespace ImageSharp.Formats // Level shift by +128, clip to [0, 255], and write to dst. - temp1->CopyColorsTo(new Span(dst, offset), stride); + + //temp1->CopyColorsPlz(new MutableSpan(dst, offset), stride); + temp1->CopyColorsTo(new MutableSpan(dst, offset), stride, temp2); } private void ProcessScanImpl(int i, ref Scan currentScan, Scan[] scan, ref int totalHv) diff --git a/src/ImageSharp46/ImageSharp46.csproj b/src/ImageSharp46/ImageSharp46.csproj index 303b12c72..54cc9fb97 100644 --- a/src/ImageSharp46/ImageSharp46.csproj +++ b/src/ImageSharp46/ImageSharp46.csproj @@ -236,11 +236,10 @@ - - + diff --git a/tests/ImageSharp.Tests46/Formats/Jpg/Block8x8Tests.cs b/tests/ImageSharp.Tests46/Formats/Jpg/Block8x8FTests.cs similarity index 70% rename from tests/ImageSharp.Tests46/Formats/Jpg/Block8x8Tests.cs rename to tests/ImageSharp.Tests46/Formats/Jpg/Block8x8FTests.cs index 24824774d..814eafd87 100644 --- a/tests/ImageSharp.Tests46/Formats/Jpg/Block8x8Tests.cs +++ b/tests/ImageSharp.Tests46/Formats/Jpg/Block8x8FTests.cs @@ -1,7 +1,8 @@ // Uncomment this to turn unit tests into benchmarks: -#define BENCHMARKING +//#define BENCHMARKING using System; +using System.Collections.Generic; using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; @@ -10,11 +11,11 @@ using System.Text; using ImageSharp.Formats; using Xunit; using Xunit.Abstractions; +// ReSharper disable InconsistentNaming namespace ImageSharp.Tests.Formats.Jpg { - // ReSharper disable once InconsistentNaming - public class Block8x8Tests : UtilityTestClassBase + public class Block8x8FTests : UtilityTestClassBase { #if BENCHMARKING public const int Times = 1000000; @@ -22,7 +23,7 @@ namespace ImageSharp.Tests.Formats.Jpg public const int Times = 1; #endif - public Block8x8Tests(ITestOutputHelper output) : base(output) + public Block8x8FTests(ITestOutputHelper output) : base(output) { } @@ -65,7 +66,7 @@ namespace ImageSharp.Tests.Formats.Jpg sum += Block8x8F.GetScalarAt(&block, i); } }); - Assert.Equal(sum, 64f * 63f * 0.5f); + Assert.Equal(sum, 64f*63f*0.5f); } [Fact] @@ -73,7 +74,7 @@ namespace ImageSharp.Tests.Formats.Jpg { float sum = 0; - + Measure(Times, () => { //Block8x8F block = new Block8x8F(); @@ -88,7 +89,7 @@ namespace ImageSharp.Tests.Formats.Jpg sum += block[i]; } }); - Assert.Equal(sum, 64f * 63f * 0.5f); + Assert.Equal(sum, 64f*63f*0.5f); } [Fact] @@ -109,7 +110,7 @@ namespace ImageSharp.Tests.Formats.Jpg }); Assert.Equal(data, mirror); - //PrintLinearData((Span)mirror); + //PrintLinearData((MutableSpan)mirror); } [Fact] @@ -130,7 +131,7 @@ namespace ImageSharp.Tests.Formats.Jpg }); Assert.Equal(data, mirror); - //PrintLinearData((Span)mirror); + //PrintLinearData((MutableSpan)mirror); } [Fact] @@ -151,18 +152,18 @@ namespace ImageSharp.Tests.Formats.Jpg }); Assert.Equal(data, mirror); - //PrintLinearData((Span)mirror); + //PrintLinearData((MutableSpan)mirror); } [Fact] public void TransposeInplace() { float[] expected = Create8x8FloatData(); - ReferenceDCT.Transpose8x8(expected); + ReferenceImplementations.Transpose8x8(expected); Block8x8F buffer = new Block8x8F(); buffer.LoadFrom(Create8x8FloatData()); - + buffer.TransposeInplace(); float[] actual = new float[64]; @@ -175,7 +176,7 @@ namespace ImageSharp.Tests.Formats.Jpg public void TranposeInto_PinningImpl() { float[] expected = Create8x8FloatData(); - ReferenceDCT.Transpose8x8(expected); + ReferenceImplementations.Transpose8x8(expected); Block8x8F source = new Block8x8F(); source.LoadFrom(Create8x8FloatData()); @@ -186,14 +187,14 @@ namespace ImageSharp.Tests.Formats.Jpg float[] actual = new float[64]; dest.CopyTo(actual); - Assert.Equal(expected, actual); + Assert.Equal(expected, actual); } [Fact] public void TransposeInto() { float[] expected = Create8x8FloatData(); - ReferenceDCT.Transpose8x8(expected); + ReferenceImplementations.Transpose8x8(expected); Block8x8F source = new Block8x8F(); source.LoadFrom(Create8x8FloatData()); @@ -208,21 +209,21 @@ namespace ImageSharp.Tests.Formats.Jpg } [Fact] - public void Buffer8x8_TransposeInto_GeneratorTest() + public void TransposeInto_CodeGeneratorTest() { char[] coordz = new[] {'X', 'Y', 'Z', 'W'}; StringBuilder bld = new StringBuilder(); for (int i = 0; i < 8; i++) { - char destCoord = coordz[i % 4]; - char destSide = (i / 4) % 2 == 0 ? 'L' : 'R'; + char destCoord = coordz[i%4]; + char destSide = (i/4)%2 == 0 ? 'L' : 'R'; for (int j = 0; j < 8; j++) { - char srcCoord = coordz[j % 4]; - char srcSide = (j / 4) % 2 == 0 ? 'L' : 'R'; - + char srcCoord = coordz[j%4]; + char srcSide = (j/4)%2 == 0 ? 'L' : 'R'; + string expression = $"d.V{j}{destSide}.{destCoord} = V{i}{srcSide}.{srcCoord}; "; bld.Append(expression); } @@ -237,7 +238,7 @@ namespace ImageSharp.Tests.Formats.Jpg public unsafe void TransposeInto_WithPointers() { float[] expected = Create8x8FloatData(); - ReferenceDCT.Transpose8x8(expected); + ReferenceImplementations.Transpose8x8(expected); Block8x8F source = new Block8x8F(); source.LoadFrom(Create8x8FloatData()); @@ -322,7 +323,7 @@ namespace ImageSharp.Tests.Formats.Jpg Output.WriteLine($"TransposeInto_WithPointers_Benchmark finished in {sw.ElapsedMilliseconds} ms"); } } - + } @@ -331,9 +332,9 @@ namespace ImageSharp.Tests.Formats.Jpg { float[] sourceArray = Create8x8FloatData(); float[] expectedDestArray = new float[64]; - - ReferenceDCT.iDCT2D8x4_32f(sourceArray, expectedDestArray); - + + ReferenceImplementations.iDCT2D8x4_32f(sourceArray, expectedDestArray); + Block8x8F source = new Block8x8F(); source.LoadFrom(sourceArray); @@ -354,11 +355,11 @@ namespace ImageSharp.Tests.Formats.Jpg [Fact] public void iDCT2D8x4_RightPart() { - Span sourceArray = Create8x8FloatData(); - Span expectedDestArray = new float[64]; + MutableSpan sourceArray = Create8x8FloatData(); + MutableSpan expectedDestArray = new float[64]; + + ReferenceImplementations.iDCT2D8x4_32f(sourceArray.Slice(4), expectedDestArray.Slice(4)); - ReferenceDCT.iDCT2D8x4_32f(sourceArray.Slice(4), expectedDestArray.Slice(4)); - Block8x8F source = new Block8x8F(); source.LoadFrom(sourceArray); @@ -376,6 +377,23 @@ namespace ImageSharp.Tests.Formats.Jpg Assert.Equal(expectedDestArray.Data, actualDestArray); } + private struct ApproximateFloatComparer : IEqualityComparer + { + private const float Eps = 0.0001f; + + public bool Equals(float x, float y) + { + float d = x - y; + + return d > -Eps && d < Eps; + } + + public int GetHashCode(float obj) + { + throw new InvalidOperationException(); + } + } + [Fact] public void IDCTInto() { @@ -383,8 +401,10 @@ namespace ImageSharp.Tests.Formats.Jpg float[] expectedDestArray = new float[64]; float[] tempArray = new float[64]; - ReferenceDCT.iDCT8x8_llm_sse(sourceArray, expectedDestArray, tempArray); - + ReferenceImplementations.iDCT2D_llm(sourceArray, expectedDestArray, tempArray); + + //ReferenceImplementations.iDCT8x8_llm_sse(sourceArray, expectedDestArray, tempArray); + Block8x8F source = new Block8x8F(); source.LoadFrom(sourceArray); @@ -399,52 +419,93 @@ namespace ImageSharp.Tests.Formats.Jpg Print8x8Data(expectedDestArray); Output.WriteLine("**************"); Print8x8Data(actualDestArray); - Assert.Equal(expectedDestArray, actualDestArray); + Assert.Equal(expectedDestArray, actualDestArray, new ApproximateFloatComparer()); + Assert.Equal(expectedDestArray, actualDestArray, new ApproximateFloatComparer()); } - private unsafe void CopyColorsTo_ReferenceImpl(ref Block8x8F block, Span buffer, int stride) + + [Fact] + public unsafe void CopyColorsTo() { - fixed (Block8x8F* p = &block) - { - float* b = (float*)p; + var data = Create8x8FloatData(); + Block8x8F block = new Block8x8F(); + block.LoadFrom(data); + block.MultiplyAllInplace(new Vector4(5, 5, 5, 5)); - for (int y = 0; y < 8; y++) - { - int y8 = y * 8; - int yStride = y * stride; + int stride = 256; + int height = 42; + int offset = height*10 + 20; - for (int x = 0; x < 8; x++) - { - float c = b[y8 + x]; - - if (c < -128) - { - c = 0; - } - else if (c > 127) - { - c = 255; - } - else - { - c += 128; - } - - buffer[yStride + x] = (byte)c; - } - } + byte[] colorsExpected = new byte[stride*height]; + byte[] colorsActual = new byte[stride*height]; + + Block8x8F temp = new Block8x8F(); + + ReferenceImplementations.CopyColorsTo(ref block, new MutableSpan(colorsExpected, offset), stride); + + block.CopyColorsTo(new MutableSpan(colorsActual, offset), stride, &temp); + + //Output.WriteLine("******* EXPECTED: *********"); + //PrintLinearData(colorsExpected); + //Output.WriteLine("******** ACTUAL: **********"); + + Assert.Equal(colorsExpected, colorsActual); + } + + [Fact] + public void CropInto() + { + Block8x8F block = new Block8x8F(); + block.LoadFrom(Create8x8FloatData()); + + Block8x8F dest = new Block8x8F(); + block.CropInto(10, 20, ref dest); + + float[] array = new float[64]; + dest.CopyTo(array); + PrintLinearData(array); + foreach (float val in array) + { + Assert.InRange(val, 10, 20); } + } + private static float[] Create8x8ColorCropTestData() + { + float[] result = new float[64]; + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + result[i * 8 + j] = -300 + i * 100 + j * 10; + } + } + return result; } [Fact] - public void CopyColorsTo() + public void ColorifyInto() { - var data = Create8x8FloatData(); Block8x8F block = new Block8x8F(); - block.LoadFrom(data); + var input = Create8x8ColorCropTestData(); + block.LoadFrom(input); + Output.WriteLine("Input:"); + PrintLinearData(input); + + + Block8x8F dest = new Block8x8F(); + block.ColorifyInto(ref dest); + float[] array = new float[64]; + dest.CopyTo(array); + Output.WriteLine("Result:"); + PrintLinearData(array); + foreach (float val in array) + { + Assert.InRange(val, 0, 255); + } } + } } \ No newline at end of file diff --git a/tests/ImageSharp.Tests46/Formats/Jpg/DctTests.cs b/tests/ImageSharp.Tests46/Formats/Jpg/DctTests.cs index 3d3786ef3..77f9fd98c 100644 --- a/tests/ImageSharp.Tests46/Formats/Jpg/DctTests.cs +++ b/tests/ImageSharp.Tests46/Formats/Jpg/DctTests.cs @@ -32,9 +32,9 @@ namespace ImageSharp.Tests.Formats.Jpg { var data = Create8x8FloatData(); - Span result = new Span(64); + MutableSpan result = new MutableSpan(64); - ReferenceDCT.Transpose8x8(data, result); + ReferenceImplementations.Transpose8x8(data, result); Print8x8Data(result.Data); } @@ -44,7 +44,7 @@ namespace ImageSharp.Tests.Formats.Jpg { var data = Create8x8FloatData(); - ReferenceDCT.Transpose8x8(data); + ReferenceImplementations.Transpose8x8(data); Print8x8Data(data); } diff --git a/tests/ImageSharp.Tests46/Formats/Jpg/ReferenceDCT.cs b/tests/ImageSharp.Tests46/Formats/Jpg/ReferenceImplementations.cs similarity index 62% rename from tests/ImageSharp.Tests46/Formats/Jpg/ReferenceDCT.cs rename to tests/ImageSharp.Tests46/Formats/Jpg/ReferenceImplementations.cs index de9077e46..8cbc267af 100644 --- a/tests/ImageSharp.Tests46/Formats/Jpg/ReferenceDCT.cs +++ b/tests/ImageSharp.Tests46/Formats/Jpg/ReferenceImplementations.cs @@ -2,21 +2,19 @@ using System.Buffers; using System.Numerics; using System.Runtime.CompilerServices; +using ImageSharp.Formats; -namespace ImageSharp.Formats +// ReSharper disable InconsistentNaming + +namespace ImageSharp.Tests.Formats.Jpg { /// - /// Ported from https://github.com/norishigefukushima/dct_simd - /// In this form, its Slow in C# - /// Used as a reference implementation in test cases! + /// This class contains simplified (unefficient) reference implementations so we can verify actual ones in unit tests + /// DCT code Ported from https://github.com/norishigefukushima/dct_simd /// - // ReSharper disable once InconsistentNaming - public static class ReferenceDCT + public static class ReferenceImplementations { - private static readonly ArrayPool FloatArrayPool = ArrayPool.Create(Block.BlockSize, 50); - - - internal static void Transpose8x8(Span data) + internal static void Transpose8x8(MutableSpan data) { for (int i = 1; i < 8; i++) { @@ -30,7 +28,7 @@ namespace ImageSharp.Formats } } - internal static void Transpose8x8(Span src, Span dest) + internal static void Transpose8x8(MutableSpan src, MutableSpan dest) { for (int i = 0; i < 8; i++) { @@ -40,24 +38,9 @@ namespace ImageSharp.Formats dest[j*8 + i] = src[i8 + j]; } } - - //Matrix4x4 a11 = Load(src, 0, 0); - //Matrix4x4 a12 = Load(src, 4, 0); - //Matrix4x4 a21 = Load(src, 0, 4); - //Matrix4x4 a22 = Load(src, 4, 4); - - //a11 = Matrix4x4.Transpose(a11); - //a12 = Matrix4x4.Transpose(a12); - //a21 = Matrix4x4.Transpose(a21); - //a22 = Matrix4x4.Transpose(a22); - - //Store(a11, dest, 0, 0); - //Store(a21, dest, 4, 0); - //Store(a12, dest, 0, 4); - //Store(a22, dest, 4, 4); } - internal static void iDCT1Dllm_32f(Span y, Span x) + internal static void iDCT1Dllm_32f(MutableSpan y, MutableSpan x) { float a0, a1, a2, a3, b0, b1, b2, b3; float z0, z1, z2, z3, z4; @@ -107,7 +90,7 @@ namespace ImageSharp.Formats x[4] = a3 - b3; } - internal static void iDCT2D_llm(Span s, Span d, Span temp) + internal static void iDCT2D_llm(MutableSpan s, MutableSpan d, MutableSpan temp) { int j; @@ -130,59 +113,24 @@ namespace ImageSharp.Formats d[j] *= 0.125f; } } + - internal static void IDCT(ref Block block) - { - Span src = Span.RentFromPool(64); - - for (int i = 0; i < 64; i++) - { - src[i] = block[i]; - } - - Span dest = Span.RentFromPool(64); - Span temp = Span.RentFromPool(64); - - //iDCT2D_llm(src, dest, temp); - //iDCT8x8GT(src, dest); - iDCT8x8_llm_sse(src, dest, temp); - - for (int i = 0; i < 64; i++) - { - block[i] = (int) (dest[i] + 0.5f); - } - - src.ReturnToPool(); - dest.ReturnToPool(); - temp.ReturnToPool(); - } - - internal static void iDCT8x8GT(Span s, Span d) - { - idct81d_sse_GT(s, d); - - Transpose8x8(d); - - idct81d_sse_GT(d, d); - - Transpose8x8(d); - } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector4 _mm_load_ps(Span src, int offset) + private static Vector4 _mm_load_ps(MutableSpan src, int offset) { src = src.Slice(offset); return new Vector4(src[0], src[1], src[2], src[3]); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector4 _mm_load_ps(Span src) + private static Vector4 _mm_load_ps(MutableSpan src) { return new Vector4(src[0], src[1], src[2], src[3]); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void _mm_store_ps(Span dest, int offset, Vector4 src) + private static void _mm_store_ps(MutableSpan dest, int offset, Vector4 src) { dest = dest.Slice(offset); dest[0] = src.X; @@ -192,79 +140,14 @@ namespace ImageSharp.Formats } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void _mm_store_ps(Span dest, Vector4 src) + private static void _mm_store_ps(MutableSpan dest, Vector4 src) { dest[0] = src.X; dest[1] = src.Y; dest[2] = src.Z; dest[3] = src.W; } - - - internal static void idct81d_sse_GT(Span src, Span dst) - { - Vector4 c1414 = new Vector4(1.4142135623731f); - Vector4 c0250 = new Vector4(0.25f); - Vector4 c0353 = new Vector4(0.353553390593274f); - Vector4 c0707 = new Vector4(0.707106781186547f); - - for (int i = 0; i < 2; i++) - { - Vector4 ms0 = _mm_load_ps(src, 0); - Vector4 ms1 = _mm_load_ps(src, 8); - Vector4 ms2 = _mm_load_ps(src, 16); - Vector4 ms3 = _mm_load_ps(src, 24); - Vector4 ms4 = _mm_load_ps(src, 32); - Vector4 ms5 = _mm_load_ps(src, 40); - Vector4 ms6 = _mm_load_ps(src, 48); - Vector4 ms7 = _mm_load_ps(src, 56); - - Vector4 mx00 = (c1414*ms0); - - Vector4 mx01 = ((new Vector4(1.38703984532215f)*ms1) + (new Vector4(0.275899379282943f)*ms7)); - Vector4 mx02 = ((new Vector4(1.30656296487638f)*ms2) + (new Vector4(0.541196100146197f)*ms6)); - Vector4 mx03 = ((new Vector4(1.17587560241936f)*ms3) + (new Vector4(0.785694958387102f)*ms5)); - - Vector4 mx04 = (c1414*ms4); - - Vector4 mx05 = ((new Vector4(-0.785694958387102f)*ms3) + (new Vector4(+1.17587560241936f)*ms5)); - Vector4 mx06 = ((new Vector4(0.541196100146197f)*ms2) + (new Vector4(-1.30656296487638f)*ms6)); - Vector4 mx07 = ((new Vector4(-0.275899379282943f)*ms1) + (new Vector4(1.38703984532215f)*ms7)); - Vector4 mx09 = (mx00 + mx04); - Vector4 mx0a = (mx01 + mx03); - - Vector4 mx0b = (c1414*mx02); - - Vector4 mx0c = (mx00 - mx04); - Vector4 mx0d = (mx01 - mx03); - - Vector4 mx0e = (c0353*(mx09 - mx0b)); - Vector4 mx0f = (c0353*(mx0c - mx0d)); - Vector4 mx10 = (c0353*(mx0c - mx0d)); - Vector4 mx11 = (c1414*mx06); - - Vector4 mx12 = (mx05 + mx07); - - Vector4 mx13 = (mx05 - mx07); - - Vector4 mx14 = (c0353*(mx11 + mx12)); - Vector4 mx15 = (c0353*(mx11 - mx12)); - Vector4 mx16 = (new Vector4(0.5f)*mx13); - - _mm_store_ps(dst, 0, ((c0250 + (mx09 + mx0b))*(c0353*mx0a))); - _mm_store_ps(dst, 8, (c0707*(mx0f + mx15))); - _mm_store_ps(dst, 16, (c0707*(mx0f - mx15))); - _mm_store_ps(dst, 24, (c0707*(mx0e + mx16))); - _mm_store_ps(dst, 32, (c0707*(mx0e - mx16))); - _mm_store_ps(dst, 40, (c0707*(mx10 - mx14))); - _mm_store_ps(dst, 48, (c0707*(mx10 + mx14))); - - _mm_store_ps(dst, 56, ((c0250*(mx09 + mx0b)) - (c0353*mx0a))); - - dst = dst.Slice(4); - src = src.Slice(4); - } - } + private static readonly Vector4 _1_175876 = new Vector4(1.175876f); private static readonly Vector4 _1_961571 = new Vector4(-1.961571f); @@ -279,7 +162,7 @@ namespace ImageSharp.Formats private static readonly Vector4 _1_847759 = new Vector4(-1.847759f); private static readonly Vector4 _0_765367 = new Vector4(0.765367f); - internal static void iDCT2D8x4_32f(Span y, Span x) + internal static void iDCT2D8x4_32f(MutableSpan y, MutableSpan x) { /* float a0,a1,a2,a3,b0,b1,b2,b3; float z0,z1,z2,z3,z4; float r[8]; int i; @@ -392,7 +275,7 @@ namespace ImageSharp.Formats */ } - internal static void iDCT8x8_llm_sse(Span s, Span d, Span temp) + internal static void iDCT8x8_llm_sse(MutableSpan s, MutableSpan d, MutableSpan temp) { Transpose8x8(s, temp); iDCT2D8x4_32f(temp, d); @@ -439,5 +322,43 @@ namespace ImageSharp.Formats _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//15 } -} + + + internal static unsafe void CopyColorsTo(ref Block8x8F block, MutableSpan buffer, int stride) + { + fixed (Block8x8F* p = &block) + { + float* b = (float*)p; + + for (int y = 0; y < 8; y++) + { + int y8 = y * 8; + int yStride = y * stride; + + for (int x = 0; x < 8; x++) + { + float c = b[y8 + x]; + + if (c < -128) + { + c = 0; + } + else if (c > 127) + { + c = 255; + } + else + { + c += 128; + } + + buffer[yStride + x] = (byte)c; + } + } + } + + + } + + } } \ No newline at end of file diff --git a/tests/ImageSharp.Tests46/Formats/Jpg/UtilityTestClassBase.cs b/tests/ImageSharp.Tests46/Formats/Jpg/UtilityTestClassBase.cs index a8dd2d7fa..55e609a52 100644 --- a/tests/ImageSharp.Tests46/Formats/Jpg/UtilityTestClassBase.cs +++ b/tests/ImageSharp.Tests46/Formats/Jpg/UtilityTestClassBase.cs @@ -30,6 +30,9 @@ namespace ImageSharp.Tests.Formats.Jpg return result; } + + + // ReSharper disable once InconsistentNaming public static int[] Create8x8IntData() { @@ -44,7 +47,7 @@ namespace ImageSharp.Tests.Formats.Jpg return result; } - internal void Print8x8Data(Span data) => Print8x8Data(data.Data); + internal void Print8x8Data(MutableSpan data) => Print8x8Data(data.Data); internal void Print8x8Data(T[] data) { @@ -61,7 +64,9 @@ namespace ImageSharp.Tests.Formats.Jpg Output.WriteLine(bld.ToString()); } - internal void PrintLinearData(Span data, int count = -1) + internal void PrintLinearData(T[] data) => PrintLinearData(new MutableSpan(data), data.Length); + + internal void PrintLinearData(MutableSpan data, int count = -1) { if (count < 0) count = data.TotalCount; diff --git a/tests/ImageSharp.Tests46/ImageSharp.Tests46.csproj b/tests/ImageSharp.Tests46/ImageSharp.Tests46.csproj index ad042f7e0..b22725c7e 100644 --- a/tests/ImageSharp.Tests46/ImageSharp.Tests46.csproj +++ b/tests/ImageSharp.Tests46/ImageSharp.Tests46.csproj @@ -78,10 +78,10 @@ - + - +