diff --git a/src/ImageSharp/Formats/Jpg/Components/Block.cs b/src/ImageSharp/Formats/Jpg/Components/Block.cs index f41e615b5..111a1cac5 100644 --- a/src/ImageSharp/Formats/Jpg/Components/Block.cs +++ b/src/ImageSharp/Formats/Jpg/Components/Block.cs @@ -12,9 +12,9 @@ namespace ImageSharp.Formats /// /// Represents an 8x8 block of coefficients to transform and encode. /// - public struct Block : IDisposable + internal struct Block : IDisposable { - private static ArrayPool IntArrayPool = ArrayPool.Create(BlockSize, 50); + private static readonly ArrayPool ArrayPool = ArrayPool.Create(BlockSize, 50); /// /// Gets the size of the block. @@ -37,7 +37,7 @@ namespace ImageSharp.Formats public void Init() { //this.Data = new int[BlockSize]; - this.Data = IntArrayPool.Rent(BlockSize); + this.Data = ArrayPool.Rent(BlockSize); } public static Block Create() @@ -79,7 +79,7 @@ namespace ImageSharp.Formats { if (Data != null) { - IntArrayPool.Return(Data, true); + ArrayPool.Return(Data, true); Data = null; } } @@ -108,4 +108,109 @@ namespace ImageSharp.Formats return clone; } } + + /// + /// Temporal class to make refactoring easier. + /// 1. Refactor Block -> BlockF + /// 2. Test + /// 3. Refactor BlockF -> Block8x8F + /// + internal struct BlockF : IDisposable + { + private static readonly ArrayPool ArrayPool = ArrayPool.Create(BlockSize, 50); + + /// + /// Gets the size of the block. + /// + public const int BlockSize = 64; + + /// + /// The array of block data. + /// + public float[] Data; + + /// + /// Initializes a new instance of the class. + /// + //public Block() + //{ + // this.data = new int[BlockSize]; + //} + + public void Init() + { + //this.Data = new int[BlockSize]; + this.Data = ArrayPool.Rent(BlockSize); + } + + public static BlockF Create() + { + var block = new BlockF(); + block.Init(); + return block; + } + + public static BlockF[] CreateArray(int size) + { + BlockF[] result = new BlockF[size]; + for (int i = 0; i < result.Length; i++) + { + result[i].Init(); + } + return result; + } + + public bool IsInitialized => this.Data != null; + + /// + /// Gets the pixel data at the given block index. + /// + /// The index of the data to return. + /// + /// The . + /// + public float this[int index] + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get { return this.Data[index]; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + set { this.Data[index] = value; } + } + + // TODO: Refactor Block.Dispose() callers to always use 'using' or 'finally' statement! + public void Dispose() + { + if (Data != null) + { + ArrayPool.Return(Data, true); + Data = null; + } + } + + public static void DisposeAll(BlockF[] blocks) + { + for (int i = 0; i < blocks.Length; i++) + { + blocks[i].Dispose(); + } + } + + + public void Clear() + { + for (int i = 0; i < Data.Length; i++) + { + Data[i] = 0; + } + } + + public BlockF Clone() + { + BlockF clone = Create(); + Array.Copy(Data, clone.Data, BlockSize); + return clone; + } + } + + } diff --git a/src/ImageSharp/Formats/Jpg/Components/Block8x8F.Generated.cs b/src/ImageSharp/Formats/Jpg/Components/Block8x8F.Generated.cs new file mode 100644 index 000000000..2fec402d6 --- /dev/null +++ b/src/ImageSharp/Formats/Jpg/Components/Block8x8F.Generated.cs @@ -0,0 +1,55 @@ + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; + + +namespace ImageSharp.Formats +{ + internal partial struct Block8x8F + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void TransposeInto(ref Block8x8F d) + { + d.V0L.X = V0L.X; d.V1L.X = V0L.Y; d.V2L.X = V0L.Z; d.V3L.X = V0L.W; d.V4L.X = V0R.X; d.V5L.X = V0R.Y; d.V6L.X = V0R.Z; d.V7L.X = V0R.W; + d.V0L.Y = V1L.X; d.V1L.Y = V1L.Y; d.V2L.Y = V1L.Z; d.V3L.Y = V1L.W; d.V4L.Y = V1R.X; d.V5L.Y = V1R.Y; d.V6L.Y = V1R.Z; d.V7L.Y = V1R.W; + d.V0L.Z = V2L.X; d.V1L.Z = V2L.Y; d.V2L.Z = V2L.Z; d.V3L.Z = V2L.W; d.V4L.Z = V2R.X; d.V5L.Z = V2R.Y; d.V6L.Z = V2R.Z; d.V7L.Z = V2R.W; + d.V0L.W = V3L.X; d.V1L.W = V3L.Y; d.V2L.W = V3L.Z; d.V3L.W = V3L.W; d.V4L.W = V3R.X; d.V5L.W = V3R.Y; d.V6L.W = V3R.Z; d.V7L.W = V3R.W; + d.V0R.X = V4L.X; d.V1R.X = V4L.Y; d.V2R.X = V4L.Z; d.V3R.X = V4L.W; d.V4R.X = V4R.X; d.V5R.X = V4R.Y; d.V6R.X = V4R.Z; d.V7R.X = V4R.W; + d.V0R.Y = V5L.X; d.V1R.Y = V5L.Y; d.V2R.Y = V5L.Z; d.V3R.Y = V5L.W; d.V4R.Y = V5R.X; d.V5R.Y = V5R.Y; d.V6R.Y = V5R.Z; d.V7R.Y = V5R.W; + d.V0R.Z = V6L.X; d.V1R.Z = V6L.Y; d.V2R.Z = V6L.Z; d.V3R.Z = V6L.W; d.V4R.Z = V6R.X; d.V5R.Z = V6R.Y; d.V6R.Z = V6R.Z; d.V7R.Z = V6R.W; + d.V0R.W = V7L.X; d.V1R.W = V7L.Y; d.V2R.W = V7L.Z; d.V3R.W = V7L.W; d.V4R.W = V7R.X; d.V5R.W = V7R.Y; d.V6R.W = V7R.Z; d.V7R.W = V7R.W; + } + + + public void CropInto(float min, float max, ref Block8x8F d) + { + Vector4 minVec = new Vector4(min); + Vector4 maxVec = new Vector4(max); + + d.V0L = Vector4.Max(Vector4.Min(V0L, maxVec), minVec);d.V0R = Vector4.Max(Vector4.Min(V0R, maxVec), minVec); + d.V1L = Vector4.Max(Vector4.Min(V1L, maxVec), minVec);d.V1R = Vector4.Max(Vector4.Min(V1R, maxVec), minVec); + d.V2L = Vector4.Max(Vector4.Min(V2L, maxVec), minVec);d.V2R = Vector4.Max(Vector4.Min(V2R, maxVec), minVec); + d.V3L = Vector4.Max(Vector4.Min(V3L, maxVec), minVec);d.V3R = Vector4.Max(Vector4.Min(V3R, maxVec), minVec); + d.V4L = Vector4.Max(Vector4.Min(V4L, maxVec), minVec);d.V4R = Vector4.Max(Vector4.Min(V4R, maxVec), minVec); + d.V5L = Vector4.Max(Vector4.Min(V5L, maxVec), minVec);d.V5R = Vector4.Max(Vector4.Min(V5R, maxVec), minVec); + d.V6L = Vector4.Max(Vector4.Min(V6L, maxVec), minVec);d.V6R = Vector4.Max(Vector4.Min(V6R, maxVec), minVec); + d.V7L = Vector4.Max(Vector4.Min(V7L, maxVec), minVec);d.V7R = Vector4.Max(Vector4.Min(V7R, maxVec), minVec); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void ColorifyInto(ref Block8x8F d) + { + d.V0L = Vector4.Max(Vector4.Min(V0L, CMax4), CMin4) + COff4;d.V0R = Vector4.Max(Vector4.Min(V0R, CMax4), CMin4) + COff4; + d.V1L = Vector4.Max(Vector4.Min(V1L, CMax4), CMin4) + COff4;d.V1R = Vector4.Max(Vector4.Min(V1R, CMax4), CMin4) + COff4; + d.V2L = Vector4.Max(Vector4.Min(V2L, CMax4), CMin4) + COff4;d.V2R = Vector4.Max(Vector4.Min(V2R, CMax4), CMin4) + COff4; + d.V3L = Vector4.Max(Vector4.Min(V3L, CMax4), CMin4) + COff4;d.V3R = Vector4.Max(Vector4.Min(V3R, CMax4), CMin4) + COff4; + d.V4L = Vector4.Max(Vector4.Min(V4L, CMax4), CMin4) + COff4;d.V4R = Vector4.Max(Vector4.Min(V4R, CMax4), CMin4) + COff4; + d.V5L = Vector4.Max(Vector4.Min(V5L, CMax4), CMin4) + COff4;d.V5R = Vector4.Max(Vector4.Min(V5R, CMax4), CMin4) + COff4; + d.V6L = Vector4.Max(Vector4.Min(V6L, CMax4), CMin4) + COff4;d.V6R = Vector4.Max(Vector4.Min(V6R, CMax4), CMin4) + COff4; + d.V7L = Vector4.Max(Vector4.Min(V7L, CMax4), CMin4) + COff4;d.V7R = Vector4.Max(Vector4.Min(V7R, CMax4), CMin4) + COff4; + } + + + } +} diff --git a/src/ImageSharp/Formats/Jpg/Components/Block8x8F.Generated.tt b/src/ImageSharp/Formats/Jpg/Components/Block8x8F.Generated.tt new file mode 100644 index 000000000..810c4e904 --- /dev/null +++ b/src/ImageSharp/Formats/Jpg/Components/Block8x8F.Generated.tt @@ -0,0 +1,93 @@ +<#@ template debug="false" hostspecific="false" language="C#" #> +<#@ assembly name="System.Core" #> +<#@ import namespace="System.Linq" #> +<#@ import namespace="System.Text" #> +<#@ import namespace="System.Collections.Generic" #> +<#@ output extension=".cs" #> + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; + +<# +char[] coordz = new[] {'X', 'Y', 'Z', 'W'}; +#> + +namespace ImageSharp.Formats +{ + internal partial struct Block8x8F + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void TransposeInto(ref Block8x8F d) + { + <# + PushIndent(" "); + + for (int i = 0; i < 8; i++) + { + char destCoord = coordz[i % 4]; + char destSide = (i / 4) % 2 == 0 ? 'L' : 'R'; + + for (int j = 0; j < 8; j++) + { + char srcCoord = coordz[j % 4]; + char srcSide = (j / 4) % 2 == 0 ? 'L' : 'R'; + + string expression = $"d.V{j}{destSide}.{destCoord} = V{i}{srcSide}.{srcCoord}; "; + //bld.Append(expression); + Write(expression); + } + //bld.AppendLine(); + WriteLine(""); + } + PopIndent(); + //Write(bld.ToString()); + #> + } + + + public void CropInto(float min, float max, ref Block8x8F d) + { + Vector4 minVec = new Vector4(min); + Vector4 maxVec = new Vector4(max); + + <# + + PushIndent(" "); + + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 2; j++) + { + char side = j == 0 ? 'L' : 'R'; + Write($"d.V{i}{side} = Vector4.Max(Vector4.Min(V{i}{side}, maxVec), minVec);"); + } + WriteLine(""); + } + PopIndent(); + #> + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void ColorifyInto(ref Block8x8F d) + { + <# + + PushIndent(" "); + + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 2; j++) + { + char side = j == 0 ? 'L' : 'R'; + Write($"d.V{i}{side} = Vector4.Max(Vector4.Min(V{i}{side}, CMax4), CMin4) + COff4;"); + } + WriteLine(""); + } + PopIndent(); + #> + } + + + } +} diff --git a/src/ImageSharp/Formats/Jpg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpg/Components/Block8x8F.cs new file mode 100644 index 000000000..02fe79f17 --- /dev/null +++ b/src/ImageSharp/Formats/Jpg/Components/Block8x8F.cs @@ -0,0 +1,617 @@ +using System; +using System.Buffers; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// ReSharper disable InconsistentNaming + +namespace ImageSharp.Formats +{ + /// + /// DCT code Ported from https://github.com/norishigefukushima/dct_simd + /// + internal partial struct Block8x8F + { + public Vector4 V0L; + public Vector4 V0R; + + public Vector4 V1L; + public Vector4 V1R; + + public Vector4 V2L; + public Vector4 V2R; + + public Vector4 V3L; + public Vector4 V3R; + + public Vector4 V4L; + public Vector4 V4R; + + public Vector4 V5L; + public Vector4 V5R; + + public Vector4 V6L; + public Vector4 V6R; + + public Vector4 V7L; + public Vector4 V7R; + + + public const int VectorCount = 16; + public const int ScalarCount = VectorCount*4; + + private static readonly ArrayPool ScalarArrayPool = ArrayPool.Create(ScalarCount, 50); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void LoadFrom(MutableSpan source) + { + fixed (Vector4* ptr = &V0L) + { + Marshal.Copy(source.Data, source.Offset, (IntPtr) ptr, ScalarCount); + //float* fp = (float*)ptr; + //for (int i = 0; i < ScalarCount; i++) + //{ + // fp[i] = source[i]; + //} + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void CopyTo(MutableSpan dest) + { + fixed (Vector4* ptr = &V0L) + { + Marshal.Copy((IntPtr) ptr, dest.Data, dest.Offset, ScalarCount); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void CopyTo(float[] dest) + { + fixed (Vector4* ptr = &V0L) + { + Marshal.Copy((IntPtr) ptr, dest, 0, ScalarCount); + } + } + + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static unsafe void LoadFrom(Block8x8F* blockPtr, MutableSpan source) + { + Marshal.Copy(source.Data, source.Offset, (IntPtr) blockPtr, ScalarCount); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static unsafe void CopyTo(Block8x8F* blockPtr, MutableSpan dest) + { + Marshal.Copy((IntPtr) blockPtr, dest.Data, dest.Offset, ScalarCount); + } + + + internal unsafe void LoadFrom(MutableSpan source) + { + fixed (Vector4* ptr = &V0L) + { + float* fp = (float*) ptr; + for (int i = 0; i < ScalarCount; i++) + { + fp[i] = source[i]; + } + } + } + + internal unsafe void CopyTo(MutableSpan dest) + { + fixed (Vector4* ptr = &V0L) + { + float* fp = (float*) ptr; + for (int i = 0; i < ScalarCount; i++) + { + dest[i] = (int) fp[i]; + } + } + } + + public unsafe void TransposeInplace() + { + fixed (Vector4* ptr = &V0L) + { + float* data = (float*) ptr; + + for (int i = 1; i < 8; i++) + { + int i8 = i*8; + for (int j = 0; j < i; j++) + { + float tmp = data[i8 + j]; + data[i8 + j] = data[j*8 + i]; + data[j*8 + i] = tmp; + } + } + } + + } + + /// + /// Reference implementation we can benchmark against + /// + internal unsafe void TransposeInto_PinningImpl(ref Block8x8F destination) + { + fixed (Vector4* sPtr = &V0L) + { + float* src = (float*) sPtr; + + fixed (Vector4* dPtr = &destination.V0L) + { + float* dest = (float*) dPtr; + + for (int i = 0; i < 8; i++) + { + int i8 = i*8; + for (int j = 0; j < 8; j++) + { + dest[j*8 + i] = src[i8 + j]; + } + } + } + } + } + + + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static unsafe void TransposeInto(Block8x8F* sourcePtr, Block8x8F* destPtr) + { + float* src = (float*) sourcePtr; + float* dest = (float*) destPtr; + + for (int i = 0; i < 8; i++) + { + int i8 = i*8; + for (int j = 0; j < 8; j++) + { + dest[j*8 + i] = src[i8 + j]; + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void MultiplyAllInplace(Vector4 s) + { + V0L *= s; + V0R *= s; + V1L *= s; + V1R *= s; + V2L *= s; + V2R *= s; + V3L *= s; + V3R *= s; + V4L *= s; + V4R *= s; + V5L *= s; + V5R *= s; + V6L *= s; + V6R *= s; + V7L *= s; + V7R *= s; + } + + // ReSharper disable once InconsistentNaming + public void IDCTInto(ref Block8x8F dest, ref Block8x8F temp) + { + TransposeInto(ref temp); + temp.iDCT2D8x4_LeftPart(ref dest); + temp.iDCT2D8x4_RightPart(ref dest); + + dest.TransposeInto(ref temp); + + temp.iDCT2D8x4_LeftPart(ref dest); + temp.iDCT2D8x4_RightPart(ref dest); + + dest.MultiplyAllInplace(_0_125); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void IDCTInplace() + { + Block8x8F result = new Block8x8F(); + Block8x8F temp = new Block8x8F(); + IDCTInto(ref result, ref temp); + this = result; + } + + private static readonly Vector4 _1_175876 = new Vector4(1.175876f); + private static readonly Vector4 _1_961571 = new Vector4(-1.961571f); + private static readonly Vector4 _0_390181 = new Vector4(-0.390181f); + private static readonly Vector4 _0_899976 = new Vector4(-0.899976f); + private static readonly Vector4 _2_562915 = new Vector4(-2.562915f); + private static readonly Vector4 _0_298631 = new Vector4(0.298631f); + private static readonly Vector4 _2_053120 = new Vector4(2.053120f); + private static readonly Vector4 _3_072711 = new Vector4(3.072711f); + private static readonly Vector4 _1_501321 = new Vector4(1.501321f); + private static readonly Vector4 _0_541196 = new Vector4(0.541196f); + private static readonly Vector4 _1_847759 = new Vector4(-1.847759f); + private static readonly Vector4 _0_765367 = new Vector4(0.765367f); + private static readonly Vector4 _0_125 = new Vector4(0.1250f); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void iDCT2D8x4_LeftPart(ref Block8x8F d) + { + /* + float a0,a1,a2,a3,b0,b1,b2,b3; float z0,z1,z2,z3,z4; float r[8]; int i; + for(i = 0;i < 8;i++){ r[i] = (float)(cos((double)i / 16.0 * M_PI) * M_SQRT2); } + */ + /* + 0: 1.414214 + 1: 1.387040 + 2: 1.306563 + 3: + 4: 1.000000 + 5: 0.785695 + 6: + 7: 0.275899 + */ + + Vector4 my1 = V1L; + Vector4 my7 = V7L; + Vector4 mz0 = my1 + my7; + + Vector4 my3 = V3L; + Vector4 mz2 = my3 + my7; + Vector4 my5 = V5L; + Vector4 mz1 = my3 + my5; + Vector4 mz3 = my1 + my5; + + Vector4 mz4 = ((mz0 + mz1)*_1_175876); + //z0 = y[1] + y[7]; z1 = y[3] + y[5]; z2 = y[3] + y[7]; z3 = y[1] + y[5]; + //z4 = (z0 + z1) * r[3]; + + mz2 = mz2*_1_961571 + mz4; + mz3 = mz3*_0_390181 + mz4; + mz0 = mz0*_0_899976; + mz1 = mz1*_2_562915; + + /* + -0.899976 + -2.562915 + -1.961571 + -0.390181 + z0 = z0 * (-r[3] + r[7]); + z1 = z1 * (-r[3] - r[1]); + z2 = z2 * (-r[3] - r[5]) + z4; + z3 = z3 * (-r[3] + r[5]) + z4;*/ + + + Vector4 mb3 = my7*_0_298631 + mz0 + mz2; + Vector4 mb2 = my5*_2_053120 + mz1 + mz3; + Vector4 mb1 = my3*_3_072711 + mz1 + mz2; + Vector4 mb0 = my1*_1_501321 + mz0 + mz3; + + /* + 0.298631 + 2.053120 + 3.072711 + 1.501321 + b3 = y[7] * (-r[1] + r[3] + r[5] - r[7]) + z0 + z2; + b2 = y[5] * ( r[1] + r[3] - r[5] + r[7]) + z1 + z3; + b1 = y[3] * ( r[1] + r[3] + r[5] - r[7]) + z1 + z2; + b0 = y[1] * ( r[1] + r[3] - r[5] - r[7]) + z0 + z3; + */ + + Vector4 my2 = V2L; + Vector4 my6 = V6L; + mz4 = (my2 + my6)*_0_541196; + Vector4 my0 = V0L; + Vector4 my4 = V4L; + mz0 = my0 + my4; + mz1 = my0 - my4; + + mz2 = mz4 + my6*_1_847759; + mz3 = mz4 + my2*_0_765367; + + my0 = mz0 + mz3; + my3 = mz0 - mz3; + my1 = mz1 + mz2; + my2 = mz1 - mz2; + /* + 1.847759 + 0.765367 + z4 = (y[2] + y[6]) * r[6]; + z0 = y[0] + y[4]; z1 = y[0] - y[4]; + z2 = z4 - y[6] * (r[2] + r[6]); + z3 = z4 + y[2] * (r[2] - r[6]); + a0 = z0 + z3; a3 = z0 - z3; + a1 = z1 + z2; a2 = z1 - z2; + */ + + d.V0L = my0 + mb0; + d.V7L = my0 - mb0; + d.V1L = my1 + mb1; + d.V6L = my1 - mb1; + d.V2L = my2 + mb2; + d.V5L = my2 - mb2; + d.V3L = my3 + mb3; + d.V4L = my3 - mb3; + /* + x[0] = a0 + b0; x[7] = a0 - b0; + x[1] = a1 + b1; x[6] = a1 - b1; + x[2] = a2 + b2; x[5] = a2 - b2; + x[3] = a3 + b3; x[4] = a3 - b3; + for(i = 0;i < 8;i++){ x[i] *= 0.353554f; } + */ + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void iDCT2D8x4_RightPart(ref Block8x8F d) + { + /* + float a0,a1,a2,a3,b0,b1,b2,b3; float z0,z1,z2,z3,z4; float r[8]; int i; + for(i = 0;i < 8;i++){ r[i] = (float)(cos((double)i / 16.0 * M_PI) * M_SQRT2); } + */ + /* + 0: 1.414214 + 1: 1.387040 + 2: 1.306563 + 3: + 4: 1.000000 + 5: 0.785695 + 6: + 7: 0.275899 + */ + + Vector4 my1 = V1R; + Vector4 my7 = V7R; + Vector4 mz0 = my1 + my7; + + Vector4 my3 = V3R; + Vector4 mz2 = my3 + my7; + Vector4 my5 = V5R; + Vector4 mz1 = my3 + my5; + Vector4 mz3 = my1 + my5; + + Vector4 mz4 = ((mz0 + mz1)*_1_175876); + //z0 = y[1] + y[7]; z1 = y[3] + y[5]; z2 = y[3] + y[7]; z3 = y[1] + y[5]; + //z4 = (z0 + z1) * r[3]; + + mz2 = mz2*_1_961571 + mz4; + mz3 = mz3*_0_390181 + mz4; + mz0 = mz0*_0_899976; + mz1 = mz1*_2_562915; + + /* + -0.899976 + -2.562915 + -1.961571 + -0.390181 + z0 = z0 * (-r[3] + r[7]); + z1 = z1 * (-r[3] - r[1]); + z2 = z2 * (-r[3] - r[5]) + z4; + z3 = z3 * (-r[3] + r[5]) + z4;*/ + + + Vector4 mb3 = my7*_0_298631 + mz0 + mz2; + Vector4 mb2 = my5*_2_053120 + mz1 + mz3; + Vector4 mb1 = my3*_3_072711 + mz1 + mz2; + Vector4 mb0 = my1*_1_501321 + mz0 + mz3; + + /* + 0.298631 + 2.053120 + 3.072711 + 1.501321 + b3 = y[7] * (-r[1] + r[3] + r[5] - r[7]) + z0 + z2; + b2 = y[5] * ( r[1] + r[3] - r[5] + r[7]) + z1 + z3; + b1 = y[3] * ( r[1] + r[3] + r[5] - r[7]) + z1 + z2; + b0 = y[1] * ( r[1] + r[3] - r[5] - r[7]) + z0 + z3; + */ + + Vector4 my2 = V2R; + Vector4 my6 = V6R; + mz4 = (my2 + my6)*_0_541196; + Vector4 my0 = V0R; + Vector4 my4 = V4R; + mz0 = my0 + my4; + mz1 = my0 - my4; + + mz2 = mz4 + my6*_1_847759; + mz3 = mz4 + my2*_0_765367; + + my0 = mz0 + mz3; + my3 = mz0 - mz3; + my1 = mz1 + mz2; + my2 = mz1 - mz2; + /* + 1.847759 + 0.765367 + z4 = (y[2] + y[6]) * r[6]; + z0 = y[0] + y[4]; z1 = y[0] - y[4]; + z2 = z4 - y[6] * (r[2] + r[6]); + z3 = z4 + y[2] * (r[2] - r[6]); + a0 = z0 + z3; a3 = z0 - z3; + a1 = z1 + z2; a2 = z1 - z2; + */ + + d.V0R = my0 + mb0; + d.V7R = my0 - mb0; + d.V1R = my1 + mb1; + d.V6R = my1 - mb1; + d.V2R = my2 + mb2; + d.V5R = my2 - mb2; + d.V3R = my3 + mb3; + d.V4R = my3 - mb3; + /* + x[0] = a0 + b0; x[7] = a0 - b0; + x[1] = a1 + b1; x[6] = a1 - b1; + x[2] = a2 + b2; x[5] = a2 - b2; + x[3] = a3 + b3; x[4] = a3 - b3; + for(i = 0;i < 8;i++){ x[i] *= 0.353554f; } + */ + } + + internal static void SuchIDCT(ref Block block) + { + Block8x8F source = new Block8x8F(); + source.LoadFrom(block.Data); + + Block8x8F dest = new Block8x8F(); + Block8x8F temp = new Block8x8F(); + + source.IDCTInto(ref dest, ref temp); + dest.CopyTo(block.Data); + } + + internal static void SuchIDCT(ref BlockF block) + { + Block8x8F source = new Block8x8F(); + source.LoadFrom(block.Data); + + Block8x8F dest = new Block8x8F(); + Block8x8F temp = new Block8x8F(); + + source.IDCTInto(ref dest, ref temp); + dest.CopyTo(block.Data); + } + + public unsafe float this[int idx] + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get + { + fixed (Block8x8F* p = &this) + { + float* fp = (float*) p; + return fp[idx]; + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + set + { + fixed (Block8x8F* p = &this) + { + float* fp = (float*) p; + fp[idx] = value; + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static unsafe float GetScalarAt(Block8x8F* blockPtr, int idx) + { + float* fp = (float*) blockPtr; + return fp[idx]; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static unsafe void SetScalarAt(Block8x8F* blockPtr, int idx, float value) + { + float* fp = (float*) blockPtr; + fp[idx] = value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void Clear() + { + this = new Block8x8F(); // LOL C# Plz! + } + + internal void LoadFrom(ref BlockF legacyBlock) + { + LoadFrom(legacyBlock.Data); + } + + internal void CopyTo(ref BlockF legacyBlock) + { + CopyTo(legacyBlock.Data); + } + + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static byte ToColorByte(float c) + { + if (c < -128) + { + return 0; + } + else if (c > 127) + { + return 255; + } + else + { + c += 128; + return (byte) c; + } + } + + + + internal unsafe void CopyColorsTo(MutableSpan buffer, int stride) + { + fixed (Block8x8F* p = &this) + { + float* b = (float*) p; + + for (int y = 0; y < 8; y++) + { + int y8 = y*8; + int yStride = y*stride; + + for (int x = 0; x < 8; x++) + { + float c = b[y8 + x]; + + if (c < -128) + { + c = 0; + } + else if (c > 127) + { + c = 255; + } + else + { + c += 128; + } + + buffer[yStride + x] = (byte) c; + } + } + } + + } + + private static readonly Vector4 CMin4 = new Vector4(-128f); + private static readonly Vector4 CMax4 = new Vector4(127f); + private static readonly Vector4 COff4 = new Vector4(128f); + + /// + /// Level shift by +128, clip to [0, 255], and write to buffer. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal unsafe void CopyColorsTo( + MutableSpan buffer, + int stride, + Block8x8F* temp) + { + ColorifyInto(ref *temp); + + float* src = (float*) temp; + for (int i = 0; i < 8; i++) + { + buffer[0] = (byte) src[0]; + buffer[1] = (byte) src[1]; + buffer[2] = (byte) src[2]; + buffer[3] = (byte) src[3]; + buffer[4] = (byte) src[4]; + buffer[5] = (byte) src[5]; + buffer[6] = (byte) src[6]; + buffer[7] = (byte) src[7]; + buffer.AddOffset(stride); + src += 8; + } + } + + + } +} \ No newline at end of file diff --git a/src/ImageSharp/Formats/Jpg/Components/MutableSpan.cs b/src/ImageSharp/Formats/Jpg/Components/MutableSpan.cs new file mode 100644 index 000000000..0cb11690b --- /dev/null +++ b/src/ImageSharp/Formats/Jpg/Components/MutableSpan.cs @@ -0,0 +1,95 @@ +using System.Buffers; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace ImageSharp.Formats +{ + /// + /// Like corefxlab Span, but with an AddOffset() method for efficiency. + /// TODO: When Span will be official, consider replacing this class! + /// + /// + /// + internal struct MutableSpan + { + public T[] Data; + public int Offset; + + public int TotalCount => Data.Length - Offset; + + public MutableSpan(int size, int offset = 0) + { + Data = new T[size]; + Offset = offset; + } + + public MutableSpan(T[] data, int offset = 0) + { + Data = data; + Offset = offset; + } + + public T this[int idx] + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] get { return Data[idx + Offset]; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] set { Data[idx + Offset] = value; } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public MutableSpan Slice(int offset) + { + return new MutableSpan(Data, Offset + offset); + } + + public static implicit operator MutableSpan(T[] data) => new MutableSpan(data, 0); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void AddOffset(int offset) + { + Offset += offset; + } + } + + internal static class MutableSpanExtensions + { + public static MutableSpan Slice(this T[] array, int offset) => new MutableSpan(array, offset); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void SaveTo(this MutableSpan data, ref Vector4 v) + { + v.X = data[0]; + v.Y = data[1]; + v.Z = data[2]; + v.W = data[3]; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void SaveTo(this MutableSpan data, ref Vector4 v) + { + v.X = data[0]; + v.Y = data[1]; + v.Z = data[2]; + v.W = data[3]; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void LoadFrom(this MutableSpan data, ref Vector4 v) + { + data[0] = v.X; + data[1] = v.Y; + data[2] = v.Z; + data[3] = v.W; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void LoadFrom(this MutableSpan data, ref Vector4 v) + { + data[0] = (int)v.X; + data[1] = (int)v.Y; + data[2] = (int)v.Z; + data[3] = (int)v.W; + } + + + } +} \ No newline at end of file diff --git a/src/ImageSharp/Formats/Jpg/JpegDecoderCore.cs b/src/ImageSharp/Formats/Jpg/JpegDecoderCore.cs index 4a7726be4..a2a570aed 100644 --- a/src/ImageSharp/Formats/Jpg/JpegDecoderCore.cs +++ b/src/ImageSharp/Formats/Jpg/JpegDecoderCore.cs @@ -3,6 +3,7 @@ // Licensed under the Apache License, Version 2.0. // +using System.Diagnostics; using System.Runtime.CompilerServices; namespace ImageSharp.Formats @@ -14,7 +15,7 @@ namespace ImageSharp.Formats /// /// Performs the jpeg decoding operation. /// - internal class JpegDecoderCore : IDisposable + internal unsafe class JpegDecoderCore : IDisposable { /// /// The maximum (inclusive) number of bits in a Huffman code. @@ -84,7 +85,7 @@ namespace ImageSharp.Formats /// /// Saved state between progressive-mode scans. /// - private readonly Block[][] progCoeffs; + private readonly Block8x8F[][] progCoeffs; /// /// The huffman trees @@ -96,7 +97,7 @@ namespace ImageSharp.Formats /// /// Quantization tables, in zigzag order. /// - private readonly Block[] quantizationTables; + private readonly Block8x8F[] quantizationTables; /// /// A temporary buffer for holding pixels @@ -201,12 +202,12 @@ namespace ImageSharp.Formats public JpegDecoderCore() { //this.huffmanTrees = new Huffman[MaxTc + 1, MaxTh + 1]; - this.huffmanTrees = new Huffman[(MaxTc + 1)*(MaxTh + 1)]; + this.huffmanTrees = new Huffman[(MaxTc + 1) * (MaxTh + 1)]; - this.quantizationTables = Block.CreateArray(MaxTq + 1); - this.temp = new byte[2 * Block.BlockSize]; + this.quantizationTables = new Block8x8F[MaxTq + 1]; + this.temp = new byte[2 * BlockF.BlockSize]; this.componentArray = new Component[MaxComponents]; - this.progCoeffs = new Block[MaxComponents][]; + this.progCoeffs = new Block8x8F[MaxComponents][]; this.bits = new Bits(); this.bytes = new Bytes(); @@ -216,21 +217,9 @@ namespace ImageSharp.Formats { for (int j = 0; j < MaxTh + 1; j++) { - //this.huffmanTrees[i, j].Init(LutSize, MaxNCodes, MaxCodeLength); - this.huffmanTrees[i* ThRowSize + j].Init(LutSize, MaxNCodes, MaxCodeLength); + this.huffmanTrees[i * ThRowSize + j].Init(LutSize, MaxNCodes, MaxCodeLength); } } - - //for (int i = 0; i < this.quantizationTables.Length; i++) - //{ - // //this.quantizationTables[i] = new Block(); - // this.quantizationTables[i].Init(); - //} - - //for (int i = 0; i < this.componentArray.Length; i++) - //{ - // this.componentArray[i] = new Component(); - //} } @@ -515,7 +504,7 @@ namespace ImageSharp.Formats throw new ImageFormatException("Bad Th value"); } - ProcessDefineHuffmanTablesMarkerLoop(ref this.huffmanTrees[tc* ThRowSize + th], ref remaining); + ProcessDefineHuffmanTablesMarkerLoop(ref this.huffmanTrees[tc * ThRowSize + th], ref remaining); } } @@ -571,8 +560,8 @@ namespace ImageSharp.Formats // whose codeLength's high bits matches code. // The high 8 bits of lutValue are the encoded value. // The low 8 bits are 1 plus the codeLength. - byte base2 = (byte) (code << (7 - i)); - ushort lutValue = (ushort) ((huffman.Values[x] << 8) | (2 + i)); + byte base2 = (byte)(code << (7 - i)); + ushort lutValue = (ushort)((huffman.Values[x] << 8) | (2 + i)); for (int k = 0; k < 1 << (7 - i); k++) { @@ -1117,32 +1106,32 @@ namespace ImageSharp.Formats switch (x >> 4) { case 0: - if (remaining < Block.BlockSize) + if (remaining < BlockF.BlockSize) { done = true; break; } - remaining -= Block.BlockSize; - this.ReadFull(this.temp, 0, Block.BlockSize); + remaining -= BlockF.BlockSize; + this.ReadFull(this.temp, 0, BlockF.BlockSize); - for (int i = 0; i < Block.BlockSize; i++) + for (int i = 0; i < BlockF.BlockSize; i++) { this.quantizationTables[tq][i] = this.temp[i]; } break; case 1: - if (remaining < 2 * Block.BlockSize) + if (remaining < 2 * BlockF.BlockSize) { done = true; break; } - remaining -= 2 * Block.BlockSize; - this.ReadFull(this.temp, 0, 2 * Block.BlockSize); + remaining -= 2 * BlockF.BlockSize; + this.ReadFull(this.temp, 0, 2 * BlockF.BlockSize); - for (int i = 0; i < Block.BlockSize; i++) + for (int i = 0; i < BlockF.BlockSize; i++) { this.quantizationTables[tq][i] = (this.temp[2 * i] << 8) | this.temp[(2 * i) + 1]; } @@ -1471,7 +1460,7 @@ namespace ImageSharp.Formats } } - private Block scanWorkerBlock = Block.Create(); + private BlockF scanWorkerBlock = BlockF.Create(); /// /// Processes the SOS (Start of scan marker). @@ -1535,7 +1524,7 @@ namespace ImageSharp.Formats // significant bit. // For baseline JPEGs, these parameters are hard-coded to 0/63/0/0. int zigStart = 0; - int zigEnd = Block.BlockSize - 1; + int zigEnd = BlockF.BlockSize - 1; int ah = 0; int al = 0; @@ -1546,7 +1535,7 @@ namespace ImageSharp.Formats ah = this.temp[3 + scanComponentCountX2] >> 4; al = this.temp[3 + scanComponentCountX2] & 0x0f; - if ((zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || Block.BlockSize <= zigEnd) + if ((zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || BlockF.BlockSize <= zigEnd) { throw new ImageFormatException("Bad spectral selection bounds"); } @@ -1580,12 +1569,9 @@ namespace ImageSharp.Formats int compIndex = scan[i].Index; if (this.progCoeffs[compIndex] == null) { - this.progCoeffs[compIndex] = Block.CreateArray(mxx * myy * this.componentArray[compIndex].HorizontalFactor * this.componentArray[compIndex].VerticalFactor); + var size = mxx * myy * this.componentArray[compIndex].HorizontalFactor * this.componentArray[compIndex].VerticalFactor; - for (int j = 0; j < this.progCoeffs[compIndex].Length; j++) - { - this.progCoeffs[compIndex][j].Init(); - } + this.progCoeffs[compIndex] = new Block8x8F[size]; } } } @@ -1603,6 +1589,10 @@ namespace ImageSharp.Formats // blocks: the third block in the first row has (bx, by) = (2, 0). int bx, by, blockCount = 0; + Block8x8F b = new Block8x8F(); + Block8x8F temp1 = new Block8x8F(); + Block8x8F temp2 = new Block8x8F(); + for (int my = 0; my < myy; my++) { for (int mx = 0; mx < mxx; mx++) @@ -1612,7 +1602,7 @@ namespace ImageSharp.Formats int compIndex = scan[i].Index; int hi = this.componentArray[compIndex].HorizontalFactor; int vi = this.componentArray[compIndex].VerticalFactor; - + for (int j = 0; j < hi * vi; j++) { @@ -1656,27 +1646,39 @@ namespace ImageSharp.Formats } var qtIndex = this.componentArray[compIndex].Selector; - - if (this.isProgressive) // Load the previous partially decoded coefficients, if applicable. - { - blockIndex = ((@by * mxx) * hi) + bx; - ProcessBlockImpl(ah, - ref this.progCoeffs[compIndex][blockIndex], - scan, i, zigStart, zigEnd, al, dc, compIndex, @by, mxx, hi, bx, - ref this.quantizationTables[qtIndex] - ); - } - else + + // TODO: Find a way to clean up this mess + fixed (Block8x8F* qtp = &this.quantizationTables[qtIndex]) { - //var b = Block.Create(); - scanWorkerBlock.Clear(); - - ProcessBlockImpl(ah, ref scanWorkerBlock, scan, i, zigStart, zigEnd, al, dc, compIndex, @by, mxx, hi, - bx, ref this.quantizationTables[qtIndex] - ); - - //b.Dispose(); + if (this.isProgressive) // Load the previous partially decoded coefficients, if applicable. + { + blockIndex = ((@by * mxx) * hi) + bx; + + fixed (Block8x8F* bp = &this.progCoeffs[compIndex][blockIndex]) + { + ProcessBlockImpl(ah, + bp, + &temp1, + &temp2, + scan, i, zigStart, zigEnd, al, dc, compIndex, @by, mxx, hi, bx, + qtp + ); + } + } + else + { + b.Clear(); + ProcessBlockImpl(ah, + &b, + &temp1, + &temp2, + scan, i, zigStart, zigEnd, al, dc, compIndex, @by, mxx, hi, + bx, qtp + ); + } } + + } // for j @@ -1718,12 +1720,19 @@ namespace ImageSharp.Formats // for my } - private void ProcessBlockImpl(int ah, ref Block b, Scan[] scan, int i, int zigStart, int zigEnd, int al, - int[] dc, int compIndex, int @by, int mxx, int hi, int bx, ref Block qt) + private void ProcessBlockImpl( + int ah, + Block8x8F* b, + Block8x8F* temp1, + Block8x8F* temp2, + Scan[] scan, + int i, int zigStart, int zigEnd, int al, + int[] dc, int compIndex, int @by, int mxx, int hi, int bx, + Block8x8F* qt) { if (ah != 0) { - this.Refine(ref b, ref this.huffmanTrees[AcTable * ThRowSize + scan[i].AcTableSelector], zigStart, zigEnd, 1 << al); + this.Refine(b, ref this.huffmanTrees[AcTable * ThRowSize + scan[i].AcTableSelector], zigStart, zigEnd, 1 << al); } else { @@ -1741,7 +1750,9 @@ namespace ImageSharp.Formats int deltaDC = this.ReceiveExtend(value); dc[compIndex] += deltaDC; - b[0] = dc[compIndex] << al; + + //b[0] = dc[compIndex] << al; + Block8x8F.SetScalarAt(b, 0, dc[compIndex] << al); } if (zig <= zigEnd && this.eobRun > 0) @@ -1755,8 +1766,8 @@ namespace ImageSharp.Formats for (; zig <= zigEnd; zig++) { byte value = this.DecodeHuffman(ref this.huffmanTrees[AcTable * ThRowSize + scan[i].AcTableSelector]); - byte val0 = (byte) (value >> 4); - byte val1 = (byte) (value & 0x0f); + byte val0 = (byte)(value >> 4); + byte val1 = (byte)(value & 0x0f); if (val1 != 0) { zig += val0; @@ -1766,16 +1777,18 @@ namespace ImageSharp.Formats } int ac = this.ReceiveExtend(val1); - b[Unzig[zig]] = ac << al; + + //b[Unzig[zig]] = ac << al; + Block8x8F.SetScalarAt(b, Unzig[zig], ac << al); } else { if (val0 != 0x0f) { - this.eobRun = (ushort) (1 << val0); + this.eobRun = (ushort)(1 << val0); if (val0 != 0) { - this.eobRun |= (ushort) this.DecodeBits(val0); + this.eobRun |= (ushort)this.DecodeBits(val0); } this.eobRun--; @@ -1790,11 +1803,14 @@ namespace ImageSharp.Formats if (this.isProgressive) { - if (zigEnd != Block.BlockSize - 1 || al != 0) + if (zigEnd != BlockF.BlockSize - 1 || al != 0) { // We haven't completely decoded this 8x8 block. Save the coefficients. - - this.progCoeffs[compIndex][((@by*mxx)*hi) + bx] = b.Clone(); + + // TODO!!! + //throw new NotImplementedException(); + //this.progCoeffs[compIndex][((@by * mxx) * hi) + bx] = b.Clone(); + this.progCoeffs[compIndex][((@by * mxx) * hi) + bx] = *b; // At this point, we could execute the rest of the loop body to dequantize and // perform the inverse DCT, to save early stages of a progressive image to the @@ -1806,22 +1822,23 @@ namespace ImageSharp.Formats } // Dequantize, perform the inverse DCT and store the block to the image. - for (int zig = 0; zig < Block.BlockSize; zig++) + for (int zig = 0; zig < BlockF.BlockSize; zig++) { - b[Unzig[zig]] *= qt[zig]; + // TODO: We really need the fancy new corefxlab Span here ... + //b[Unzig[zig]] *= qt[zig]; + + int unzigIdx = Unzig[zig]; + float value = Block8x8F.GetScalarAt(b, unzigIdx); + value *= Block8x8F.GetScalarAt(qt, zig); + Block8x8F.SetScalarAt(b, unzigIdx, value); } - IDCT.Transform(ref b); - - // ******* Other experimental variants: ************* - - // FluxJpeg: - // https://github.com/antonfirsov/ImageSharp/blob/master/src/ImageSharp46/Formats/Jpg/Components/FloatIDCT.cs - // FloatIDCT.Transform(ref b); - - // SIMD-based: - // https://github.com/antonfirsov/ImageSharp/blob/master/src/ImageSharp46/Formats/Jpg/Components/MagicDCT.cs - // MagicDCT.IDCT(ref b); + //IDCT.Transform(ref b); + //FloatIDCT.Transform(ref b); + //ReferenceDCT.IDCT(ref b); + //Block8x8F.SuchIDCT(ref b); + //b->IDCTInplace(); + b->IDCTInto(ref *temp1, ref *temp2); byte[] dst; int offset; @@ -1831,7 +1848,7 @@ namespace ImageSharp.Formats { dst = this.grayImage.Pixels; stride = this.grayImage.Stride; - offset = this.grayImage.Offset + (8*((@by*this.grayImage.Stride) + bx)); + offset = this.grayImage.Offset + (8 * ((@by * this.grayImage.Stride) + bx)); } else { @@ -1840,26 +1857,26 @@ namespace ImageSharp.Formats case 0: dst = this.ycbcrImage.YChannel; stride = this.ycbcrImage.YStride; - offset = this.ycbcrImage.YOffset + (8*((@by*this.ycbcrImage.YStride) + bx)); + offset = this.ycbcrImage.YOffset + (8 * ((@by * this.ycbcrImage.YStride) + bx)); break; case 1: dst = this.ycbcrImage.CbChannel; stride = this.ycbcrImage.CStride; - offset = this.ycbcrImage.COffset + (8*((@by*this.ycbcrImage.CStride) + bx)); + offset = this.ycbcrImage.COffset + (8 * ((@by * this.ycbcrImage.CStride) + bx)); break; case 2: dst = this.ycbcrImage.CrChannel; stride = this.ycbcrImage.CStride; - offset = this.ycbcrImage.COffset + (8*((@by*this.ycbcrImage.CStride) + bx)); + offset = this.ycbcrImage.COffset + (8 * ((@by * this.ycbcrImage.CStride) + bx)); break; case 3: dst = this.blackPixels; stride = this.blackStride; - offset = 8*((@by*this.blackStride) + bx); + offset = 8 * ((@by * this.blackStride) + bx); break; default: @@ -1868,32 +1885,12 @@ namespace ImageSharp.Formats } // Level shift by +128, clip to [0, 255], and write to dst. - for (int y = 0; y < 8; y++) - { - int y8 = y*8; - int yStride = y*stride; - for (int x = 0; x < 8; x++) - { - int c = b[y8 + x]; - if (c < -128) - { - c = 0; - } - else if (c > 127) - { - c = 255; - } - else - { - c += 128; - } - dst[yStride + x + offset] = (byte) c; - } - } + //temp1->CopyColorsPlz(new MutableSpan(dst, offset), stride); + temp1->CopyColorsTo(new MutableSpan(dst, offset), stride, temp2); } - + private void ProcessScanImpl(int i, ref Scan currentScan, Scan[] scan, ref int totalHv) { // Component selector. @@ -1934,15 +1931,15 @@ namespace ImageSharp.Formats } - totalHv += currentComponent.HorizontalFactor*currentComponent.VerticalFactor; + totalHv += currentComponent.HorizontalFactor * currentComponent.VerticalFactor; - currentScan.DcTableSelector = (byte) (this.temp[2 + (2*i)] >> 4); + currentScan.DcTableSelector = (byte)(this.temp[2 + (2 * i)] >> 4); if (currentScan.DcTableSelector > MaxTh) { throw new ImageFormatException("Bad DC table selector value"); } - currentScan.AcTableSelector = (byte) (this.temp[2 + (2*i)] & 0x0f); + currentScan.AcTableSelector = (byte)(this.temp[2 + (2 * i)] & 0x0f); if (currentScan.AcTableSelector > MaxTh) { throw new ImageFormatException("Bad AC table selector value"); @@ -1957,7 +1954,7 @@ namespace ImageSharp.Formats /// The zig-zag start index /// The zig-zag end index /// The low transform offset - private void Refine(ref Block b, ref Huffman h, int zigStart, int zigEnd, int delta) + private void Refine(Block8x8F* b, ref Huffman h, int zigStart, int zigEnd, int delta) { // Refining a DC component is trivial. if (zigStart == 0) @@ -1970,7 +1967,12 @@ namespace ImageSharp.Formats bool bit = this.DecodeBit(); if (bit) { - b[0] |= delta; + int stuff = (int) Block8x8F.GetScalarAt(b, 0); + + //int stuff = (int)b[0]; + stuff |= delta; + //b[0] = stuff; + Block8x8F.SetScalarAt(b, 0, stuff); } return; @@ -2021,6 +2023,8 @@ namespace ImageSharp.Formats break; } + int blah = zig; + zig = this.RefineNonZeroes(b, zig, zigEnd, val0, delta); if (zig > zigEnd) { @@ -2029,7 +2033,8 @@ namespace ImageSharp.Formats if (z != 0) { - b[Unzig[zig]] = z; + //b[Unzig[zig]] = z; + Block8x8F.SetScalarAt(b, Unzig[zig], z); } } } @@ -2051,12 +2056,15 @@ namespace ImageSharp.Formats /// The non-zero entry /// The low transform offset /// The - private int RefineNonZeroes(Block b, int zig, int zigEnd, int nz, int delta) + private int RefineNonZeroes(Block8x8F* b, int zig, int zigEnd, int nz, int delta) { for (; zig <= zigEnd; zig++) { int u = Unzig[zig]; - if (b[u] == 0) + float bu = Block8x8F.GetScalarAt(b, u); + + // TODO: Are the equality comparsions OK with floating point values? Isn't an epsilon value necessary? + if (bu == 0) { if (nz == 0) { @@ -2073,13 +2081,15 @@ namespace ImageSharp.Formats continue; } - if (b[u] >= 0) + if (bu >= 0) { - b[u] += delta; + //b[u] += delta; + Block8x8F.SetScalarAt(b, u, bu + delta); } else { - b[u] -= delta; + //b[u] -= delta; + Block8x8F.SetScalarAt(b, u, bu - delta); } } @@ -2271,15 +2281,6 @@ namespace ImageSharp.Formats public void Dispose() { scanWorkerBlock.Dispose(); - Block.DisposeAll(this.quantizationTables); - - foreach (Block[] blocks in progCoeffs) - { - if (blocks != null) - { - Block.DisposeAll(blocks); - } - } for (int i = 0; i < huffmanTrees.Length; i++) { diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs new file mode 100644 index 000000000..814eafd87 --- /dev/null +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -0,0 +1,511 @@ +// Uncomment this to turn unit tests into benchmarks: +//#define BENCHMARKING + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Text; +using ImageSharp.Formats; +using Xunit; +using Xunit.Abstractions; +// ReSharper disable InconsistentNaming + +namespace ImageSharp.Tests.Formats.Jpg +{ + public class Block8x8FTests : UtilityTestClassBase + { +#if BENCHMARKING + public const int Times = 1000000; +#else + public const int Times = 1; +#endif + + public Block8x8FTests(ITestOutputHelper output) : base(output) + { + } + + [Fact] + public void Indexer() + { + float sum = 0; + Measure(Times, () => + { + Block8x8F block = new Block8x8F(); + + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + block[i] = i; + } + sum = 0; + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + sum += block[i]; + } + }); + Assert.Equal(sum, 64f*63f*0.5f); + } + + [Fact] + public unsafe void Indexer_GetScalarAt_SetScalarAt() + { + float sum = 0; + Measure(Times, () => + { + Block8x8F block = new Block8x8F(); + + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + Block8x8F.SetScalarAt(&block, i, i); + } + sum = 0; + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + sum += Block8x8F.GetScalarAt(&block, i); + } + }); + Assert.Equal(sum, 64f*63f*0.5f); + } + + [Fact] + public void Indexer_ReferenceBenchmarkWithArray() + { + float sum = 0; + + + Measure(Times, () => + { + //Block8x8F block = new Block8x8F(); + float[] block = new float[64]; + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + block[i] = i; + } + sum = 0; + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + sum += block[i]; + } + }); + Assert.Equal(sum, 64f*63f*0.5f); + } + + [Fact] + public void Load_Store_FloatArray() + { + float[] data = new float[Block8x8F.ScalarCount]; + float[] mirror = new float[Block8x8F.ScalarCount]; + + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + data[i] = i; + } + Measure(Times, () => + { + Block8x8F b = new Block8x8F(); + b.LoadFrom(data); + b.CopyTo(mirror); + }); + + Assert.Equal(data, mirror); + //PrintLinearData((MutableSpan)mirror); + } + + [Fact] + public unsafe void Load_Store_FloatArray_Ptr() + { + float[] data = new float[Block8x8F.ScalarCount]; + float[] mirror = new float[Block8x8F.ScalarCount]; + + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + data[i] = i; + } + Measure(Times, () => + { + Block8x8F b = new Block8x8F(); + Block8x8F.LoadFrom(&b, data); + Block8x8F.CopyTo(&b, mirror); + }); + + Assert.Equal(data, mirror); + //PrintLinearData((MutableSpan)mirror); + } + + [Fact] + public void Load_Store_IntArray() + { + int[] data = new int[Block8x8F.ScalarCount]; + int[] mirror = new int[Block8x8F.ScalarCount]; + + for (int i = 0; i < Block8x8F.ScalarCount; i++) + { + data[i] = i; + } + Measure(Times, () => + { + Block8x8F v = new Block8x8F(); + v.LoadFrom(data); + v.CopyTo(mirror); + }); + + Assert.Equal(data, mirror); + //PrintLinearData((MutableSpan)mirror); + } + + [Fact] + public void TransposeInplace() + { + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); + + Block8x8F buffer = new Block8x8F(); + buffer.LoadFrom(Create8x8FloatData()); + + buffer.TransposeInplace(); + + float[] actual = new float[64]; + buffer.CopyTo(actual); + + Assert.Equal(expected, actual); + } + + [Fact] + public void TranposeInto_PinningImpl() + { + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); + + Block8x8F source = new Block8x8F(); + source.LoadFrom(Create8x8FloatData()); + + Block8x8F dest = new Block8x8F(); + source.TransposeInto_PinningImpl(ref dest); + + float[] actual = new float[64]; + dest.CopyTo(actual); + + Assert.Equal(expected, actual); + } + + [Fact] + public void TransposeInto() + { + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); + + Block8x8F source = new Block8x8F(); + source.LoadFrom(Create8x8FloatData()); + + Block8x8F dest = new Block8x8F(); + source.TransposeInto(ref dest); + + float[] actual = new float[64]; + dest.CopyTo(actual); + + Assert.Equal(expected, actual); + } + + [Fact] + public void TransposeInto_CodeGeneratorTest() + { + char[] coordz = new[] {'X', 'Y', 'Z', 'W'}; + StringBuilder bld = new StringBuilder(); + + for (int i = 0; i < 8; i++) + { + char destCoord = coordz[i%4]; + char destSide = (i/4)%2 == 0 ? 'L' : 'R'; + + for (int j = 0; j < 8; j++) + { + char srcCoord = coordz[j%4]; + char srcSide = (j/4)%2 == 0 ? 'L' : 'R'; + + string expression = $"d.V{j}{destSide}.{destCoord} = V{i}{srcSide}.{srcCoord}; "; + bld.Append(expression); + } + bld.AppendLine(); + } + + Output.WriteLine(bld.ToString()); + } + + + [Fact] + public unsafe void TransposeInto_WithPointers() + { + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); + + Block8x8F source = new Block8x8F(); + source.LoadFrom(Create8x8FloatData()); + + Block8x8F dest = new Block8x8F(); + + Block8x8F* sPtr = &source; + Block8x8F* dPtr = &dest; + + Block8x8F.TransposeInto(sPtr, dPtr); + + float[] actual = new float[64]; + dest.CopyTo(actual); + + Assert.Equal(expected, actual); + } + + private class BufferHolder + { + public Block8x8F Buffer; + } + + [Fact] + public void TranposeInto_Benchmark() + { + BufferHolder source = new BufferHolder(); + source.Buffer.LoadFrom(Create8x8FloatData()); + BufferHolder dest = new BufferHolder(); + + Output.WriteLine($"TranposeInto_PinningImpl_Benchmark X {Times} ..."); + Stopwatch sw = Stopwatch.StartNew(); + + for (int i = 0; i < Times; i++) + { + source.Buffer.TransposeInto(ref dest.Buffer); + } + + sw.Stop(); + Output.WriteLine($"TranposeInto_PinningImpl_Benchmark finished in {sw.ElapsedMilliseconds} ms"); + + } + + [Fact] + public void TranposeInto_PinningImpl_Benchmark() + { + BufferHolder source = new BufferHolder(); + source.Buffer.LoadFrom(Create8x8FloatData()); + BufferHolder dest = new BufferHolder(); + + Output.WriteLine($"TranposeInto_PinningImpl_Benchmark X {Times} ..."); + Stopwatch sw = Stopwatch.StartNew(); + + for (int i = 0; i < Times; i++) + { + source.Buffer.TransposeInto_PinningImpl(ref dest.Buffer); + } + + sw.Stop(); + Output.WriteLine($"TranposeInto_PinningImpl_Benchmark finished in {sw.ElapsedMilliseconds} ms"); + } + + [Fact] + public unsafe void TransposeInto_WithPointers_Benchmark() + { + BufferHolder source = new BufferHolder(); + source.Buffer.LoadFrom(Create8x8FloatData()); + BufferHolder dest = new BufferHolder(); + + fixed (Block8x8F* sPtr = &source.Buffer) + { + fixed (Block8x8F* dPtr = &dest.Buffer) + { + Output.WriteLine($"TransposeInto_WithPointers_Benchmark X {Times} ..."); + Stopwatch sw = Stopwatch.StartNew(); + + for (int i = 0; i < Times; i++) + { + Block8x8F.TransposeInto(sPtr, dPtr); + } + + sw.Stop(); + Output.WriteLine($"TransposeInto_WithPointers_Benchmark finished in {sw.ElapsedMilliseconds} ms"); + } + } + + } + + + [Fact] + public void iDCT2D8x4_LeftPart() + { + float[] sourceArray = Create8x8FloatData(); + float[] expectedDestArray = new float[64]; + + ReferenceImplementations.iDCT2D8x4_32f(sourceArray, expectedDestArray); + + Block8x8F source = new Block8x8F(); + source.LoadFrom(sourceArray); + + Block8x8F dest = new Block8x8F(); + + source.iDCT2D8x4_LeftPart(ref dest); + + float[] actualDestArray = new float[64]; + dest.CopyTo(actualDestArray); + + Print8x8Data(expectedDestArray); + Output.WriteLine("**************"); + Print8x8Data(actualDestArray); + + Assert.Equal(expectedDestArray, actualDestArray); + } + + [Fact] + public void iDCT2D8x4_RightPart() + { + MutableSpan sourceArray = Create8x8FloatData(); + MutableSpan expectedDestArray = new float[64]; + + ReferenceImplementations.iDCT2D8x4_32f(sourceArray.Slice(4), expectedDestArray.Slice(4)); + + Block8x8F source = new Block8x8F(); + source.LoadFrom(sourceArray); + + Block8x8F dest = new Block8x8F(); + + source.iDCT2D8x4_RightPart(ref dest); + + float[] actualDestArray = new float[64]; + dest.CopyTo(actualDestArray); + + Print8x8Data(expectedDestArray); + Output.WriteLine("**************"); + Print8x8Data(actualDestArray); + + Assert.Equal(expectedDestArray.Data, actualDestArray); + } + + private struct ApproximateFloatComparer : IEqualityComparer + { + private const float Eps = 0.0001f; + + public bool Equals(float x, float y) + { + float d = x - y; + + return d > -Eps && d < Eps; + } + + public int GetHashCode(float obj) + { + throw new InvalidOperationException(); + } + } + + [Fact] + public void IDCTInto() + { + float[] sourceArray = Create8x8FloatData(); + float[] expectedDestArray = new float[64]; + float[] tempArray = new float[64]; + + ReferenceImplementations.iDCT2D_llm(sourceArray, expectedDestArray, tempArray); + + //ReferenceImplementations.iDCT8x8_llm_sse(sourceArray, expectedDestArray, tempArray); + + Block8x8F source = new Block8x8F(); + source.LoadFrom(sourceArray); + + Block8x8F dest = new Block8x8F(); + Block8x8F tempBuffer = new Block8x8F(); + + source.IDCTInto(ref dest, ref tempBuffer); + + float[] actualDestArray = new float[64]; + dest.CopyTo(actualDestArray); + + Print8x8Data(expectedDestArray); + Output.WriteLine("**************"); + Print8x8Data(actualDestArray); + Assert.Equal(expectedDestArray, actualDestArray, new ApproximateFloatComparer()); + Assert.Equal(expectedDestArray, actualDestArray, new ApproximateFloatComparer()); + } + + + [Fact] + public unsafe void CopyColorsTo() + { + var data = Create8x8FloatData(); + Block8x8F block = new Block8x8F(); + block.LoadFrom(data); + block.MultiplyAllInplace(new Vector4(5, 5, 5, 5)); + + int stride = 256; + int height = 42; + int offset = height*10 + 20; + + byte[] colorsExpected = new byte[stride*height]; + byte[] colorsActual = new byte[stride*height]; + + Block8x8F temp = new Block8x8F(); + + ReferenceImplementations.CopyColorsTo(ref block, new MutableSpan(colorsExpected, offset), stride); + + block.CopyColorsTo(new MutableSpan(colorsActual, offset), stride, &temp); + + //Output.WriteLine("******* EXPECTED: *********"); + //PrintLinearData(colorsExpected); + //Output.WriteLine("******** ACTUAL: **********"); + + Assert.Equal(colorsExpected, colorsActual); + } + + [Fact] + public void CropInto() + { + Block8x8F block = new Block8x8F(); + block.LoadFrom(Create8x8FloatData()); + + Block8x8F dest = new Block8x8F(); + block.CropInto(10, 20, ref dest); + + float[] array = new float[64]; + dest.CopyTo(array); + PrintLinearData(array); + foreach (float val in array) + { + Assert.InRange(val, 10, 20); + } + + } + + private static float[] Create8x8ColorCropTestData() + { + float[] result = new float[64]; + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + result[i * 8 + j] = -300 + i * 100 + j * 10; + } + } + return result; + } + + [Fact] + public void ColorifyInto() + { + Block8x8F block = new Block8x8F(); + var input = Create8x8ColorCropTestData(); + block.LoadFrom(input); + Output.WriteLine("Input:"); + PrintLinearData(input); + + + Block8x8F dest = new Block8x8F(); + block.ColorifyInto(ref dest); + + float[] array = new float[64]; + dest.CopyTo(array); + Output.WriteLine("Result:"); + PrintLinearData(array); + foreach (float val in array) + { + Assert.InRange(val, 0, 255); + } + } + + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Formats/Jpg/DctTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/DctTests.cs new file mode 100644 index 000000000..77f9fd98c --- /dev/null +++ b/tests/ImageSharp.Tests/Formats/Jpg/DctTests.cs @@ -0,0 +1,54 @@ +using System.Numerics; +using ImageSharp.Formats; +using Xunit; +using Xunit.Abstractions; + +namespace ImageSharp.Tests.Formats.Jpg +{ + public class DctTests : UtilityTestClassBase + { + public DctTests(ITestOutputHelper output) + : base(output) + { + } + + [Fact] + public void Mennyi() + { + Output.WriteLine(Vector.IsHardwareAccelerated.ToString()); + Output.WriteLine(Vector.Count.ToString()); + } + + [Fact] + public void CheckTestData() + { + var data = Create8x8FloatData(); + + Print8x8Data(data); + } + + [Fact] + public void Transpose8x8() + { + var data = Create8x8FloatData(); + + MutableSpan result = new MutableSpan(64); + + ReferenceImplementations.Transpose8x8(data, result); + + Print8x8Data(result.Data); + } + + [Fact] + public void Transpose8x8_Inplace() + { + var data = Create8x8FloatData(); + + ReferenceImplementations.Transpose8x8(data); + + Print8x8Data(data); + } + + + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs b/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs new file mode 100644 index 000000000..8cbc267af --- /dev/null +++ b/tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs @@ -0,0 +1,364 @@ +using System; +using System.Buffers; +using System.Numerics; +using System.Runtime.CompilerServices; +using ImageSharp.Formats; + +// ReSharper disable InconsistentNaming + +namespace ImageSharp.Tests.Formats.Jpg +{ + /// + /// This class contains simplified (unefficient) reference implementations so we can verify actual ones in unit tests + /// DCT code Ported from https://github.com/norishigefukushima/dct_simd + /// + public static class ReferenceImplementations + { + internal static void Transpose8x8(MutableSpan data) + { + for (int i = 1; i < 8; i++) + { + int i8 = i*8; + for (int j = 0; j < i; j++) + { + float tmp = data[i8 + j]; + data[i8 + j] = data[j*8 + i]; + data[j*8 + i] = tmp; + } + } + } + + internal static void Transpose8x8(MutableSpan src, MutableSpan dest) + { + for (int i = 0; i < 8; i++) + { + int i8 = i*8; + for (int j = 0; j < 8; j++) + { + dest[j*8 + i] = src[i8 + j]; + } + } + } + + internal static void iDCT1Dllm_32f(MutableSpan y, MutableSpan x) + { + float a0, a1, a2, a3, b0, b1, b2, b3; + float z0, z1, z2, z3, z4; + + float r0 = 1.414214f; + float r1 = 1.387040f; + float r2 = 1.306563f; + float r3 = 1.175876f; + float r4 = 1.000000f; + float r5 = 0.785695f; + float r6 = 0.541196f; + float r7 = 0.275899f; + + z0 = y[1] + y[7]; + z1 = y[3] + y[5]; + z2 = y[3] + y[7]; + z3 = y[1] + y[5]; + z4 = (z0 + z1)*r3; + + z0 = z0*(-r3 + r7); + z1 = z1*(-r3 - r1); + z2 = z2*(-r3 - r5) + z4; + z3 = z3*(-r3 + r5) + z4; + + b3 = y[7]*(-r1 + r3 + r5 - r7) + z0 + z2; + b2 = y[5]*(r1 + r3 - r5 + r7) + z1 + z3; + b1 = y[3]*(r1 + r3 + r5 - r7) + z1 + z2; + b0 = y[1]*(r1 + r3 - r5 - r7) + z0 + z3; + + z4 = (y[2] + y[6])*r6; + z0 = y[0] + y[4]; + z1 = y[0] - y[4]; + z2 = z4 - y[6]*(r2 + r6); + z3 = z4 + y[2]*(r2 - r6); + a0 = z0 + z3; + a3 = z0 - z3; + a1 = z1 + z2; + a2 = z1 - z2; + + x[0] = a0 + b0; + x[7] = a0 - b0; + x[1] = a1 + b1; + x[6] = a1 - b1; + x[2] = a2 + b2; + x[5] = a2 - b2; + x[3] = a3 + b3; + x[4] = a3 - b3; + } + + internal static void iDCT2D_llm(MutableSpan s, MutableSpan d, MutableSpan temp) + { + int j; + + for (j = 0; j < 8; j++) + { + iDCT1Dllm_32f(s.Slice(j*8), temp.Slice(j*8)); + } + + Transpose8x8(temp, d); + + for (j = 0; j < 8; j++) + { + iDCT1Dllm_32f(d.Slice(j*8), temp.Slice(j*8)); + } + + Transpose8x8(temp, d); + + for (j = 0; j < 64; j++) + { + d[j] *= 0.125f; + } + } + + + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 _mm_load_ps(MutableSpan src, int offset) + { + src = src.Slice(offset); + return new Vector4(src[0], src[1], src[2], src[3]); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 _mm_load_ps(MutableSpan src) + { + return new Vector4(src[0], src[1], src[2], src[3]); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void _mm_store_ps(MutableSpan dest, int offset, Vector4 src) + { + dest = dest.Slice(offset); + dest[0] = src.X; + dest[1] = src.Y; + dest[2] = src.Z; + dest[3] = src.W; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void _mm_store_ps(MutableSpan dest, Vector4 src) + { + dest[0] = src.X; + dest[1] = src.Y; + dest[2] = src.Z; + dest[3] = src.W; + } + + + private static readonly Vector4 _1_175876 = new Vector4(1.175876f); + private static readonly Vector4 _1_961571 = new Vector4(-1.961571f); + private static readonly Vector4 _0_390181 = new Vector4(-0.390181f); + private static readonly Vector4 _0_899976 = new Vector4(-0.899976f); + private static readonly Vector4 _2_562915 = new Vector4(-2.562915f); + private static readonly Vector4 _0_298631 = new Vector4(0.298631f); + private static readonly Vector4 _2_053120 = new Vector4(2.053120f); + private static readonly Vector4 _3_072711 = new Vector4(3.072711f); + private static readonly Vector4 _1_501321 = new Vector4(1.501321f); + private static readonly Vector4 _0_541196 = new Vector4(0.541196f); + private static readonly Vector4 _1_847759 = new Vector4(-1.847759f); + private static readonly Vector4 _0_765367 = new Vector4(0.765367f); + + internal static void iDCT2D8x4_32f(MutableSpan y, MutableSpan x) + { + /* + float a0,a1,a2,a3,b0,b1,b2,b3; float z0,z1,z2,z3,z4; float r[8]; int i; + for(i = 0;i < 8;i++){ r[i] = (float)(cos((double)i / 16.0 * M_PI) * M_SQRT2); } + */ + /* + 0: 1.414214 + 1: 1.387040 + 2: 1.306563 + 3: + 4: 1.000000 + 5: 0.785695 + 6: + 7: 0.275899 + */ + + Vector4 my1 = _mm_load_ps(y, 8); + Vector4 my7 = _mm_load_ps(y, 56); + Vector4 mz0 = my1 + my7; + + Vector4 my3 = _mm_load_ps(y, 24); + Vector4 mz2 = my3 + my7; + Vector4 my5 = _mm_load_ps(y, 40); + Vector4 mz1 = my3 + my5; + Vector4 mz3 = my1 + my5; + + Vector4 mz4 = ((mz0 + mz1)* _1_175876); + //z0 = y[1] + y[7]; z1 = y[3] + y[5]; z2 = y[3] + y[7]; z3 = y[1] + y[5]; + //z4 = (z0 + z1) * r[3]; + + mz2 = mz2* _1_961571 + mz4; + mz3 = mz3* _0_390181 + mz4; + mz0 = mz0* _0_899976; + mz1 = mz1* _2_562915; + + /* + -0.899976 + -2.562915 + -1.961571 + -0.390181 + z0 = z0 * (-r[3] + r[7]); + z1 = z1 * (-r[3] - r[1]); + z2 = z2 * (-r[3] - r[5]) + z4; + z3 = z3 * (-r[3] + r[5]) + z4;*/ + + + Vector4 mb3 = my7* _0_298631 + mz0 + mz2; + Vector4 mb2 = my5* _2_053120 + mz1 + mz3; + Vector4 mb1 = my3* _3_072711 + mz1 + mz2; + Vector4 mb0 = my1* _1_501321 + mz0 + mz3; + + /* + 0.298631 + 2.053120 + 3.072711 + 1.501321 + b3 = y[7] * (-r[1] + r[3] + r[5] - r[7]) + z0 + z2; + b2 = y[5] * ( r[1] + r[3] - r[5] + r[7]) + z1 + z3; + b1 = y[3] * ( r[1] + r[3] + r[5] - r[7]) + z1 + z2; + b0 = y[1] * ( r[1] + r[3] - r[5] - r[7]) + z0 + z3; + */ + + Vector4 my2 = _mm_load_ps(y, 16); + Vector4 my6 = _mm_load_ps(y, 48); + mz4 = (my2 + my6)* _0_541196; + Vector4 my0 = _mm_load_ps(y, 0); + Vector4 my4 = _mm_load_ps(y, 32); + mz0 = my0 + my4; + mz1 = my0 - my4; + + mz2 = mz4 + my6* _1_847759; + mz3 = mz4 + my2* _0_765367; + + my0 = mz0 + mz3; + my3 = mz0 - mz3; + my1 = mz1 + mz2; + my2 = mz1 - mz2; + /* + 1.847759 + 0.765367 + z4 = (y[2] + y[6]) * r[6]; + z0 = y[0] + y[4]; z1 = y[0] - y[4]; + z2 = z4 - y[6] * (r[2] + r[6]); + z3 = z4 + y[2] * (r[2] - r[6]); + a0 = z0 + z3; a3 = z0 - z3; + a1 = z1 + z2; a2 = z1 - z2; + */ + + _mm_store_ps(x, 0, my0 + mb0); + + _mm_store_ps(x, 56, my0 - mb0); + + _mm_store_ps(x, 8, my1 + mb1); + + _mm_store_ps(x, 48, my1 - mb1); + + _mm_store_ps(x, 16, my2 + mb2); + + _mm_store_ps(x, 40, my2 - mb2); + + _mm_store_ps(x, 24, my3 + mb3); + + _mm_store_ps(x, 32, my3 - mb3); + /* + x[0] = a0 + b0; x[7] = a0 - b0; + x[1] = a1 + b1; x[6] = a1 - b1; + x[2] = a2 + b2; x[5] = a2 - b2; + x[3] = a3 + b3; x[4] = a3 - b3; + for(i = 0;i < 8;i++){ x[i] *= 0.353554f; } + */ + } + + internal static void iDCT8x8_llm_sse(MutableSpan s, MutableSpan d, MutableSpan temp) + { + Transpose8x8(s, temp); + iDCT2D8x4_32f(temp, d); + + iDCT2D8x4_32f(temp.Slice(4), d.Slice(4)); + + Transpose8x8(d, temp); + + iDCT2D8x4_32f(temp, d); + + iDCT2D8x4_32f(temp.Slice(4), d.Slice(4)); + + Vector4 c = new Vector4(0.1250f); + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//0 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//1 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//2 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//3 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//4 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//5 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//6 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//7 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//8 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//9 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//10 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//11 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//12 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//13 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//14 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//15 + } + + + internal static unsafe void CopyColorsTo(ref Block8x8F block, MutableSpan buffer, int stride) + { + fixed (Block8x8F* p = &block) + { + float* b = (float*)p; + + for (int y = 0; y < 8; y++) + { + int y8 = y * 8; + int yStride = y * stride; + + for (int x = 0; x < 8; x++) + { + float c = b[y8 + x]; + + if (c < -128) + { + c = 0; + } + else if (c > 127) + { + c = 255; + } + else + { + c += 128; + } + + buffer[yStride + x] = (byte)c; + } + } + } + + + } + + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Formats/Jpg/UtilityTestClassBase.cs b/tests/ImageSharp.Tests/Formats/Jpg/UtilityTestClassBase.cs new file mode 100644 index 000000000..55e609a52 --- /dev/null +++ b/tests/ImageSharp.Tests/Formats/Jpg/UtilityTestClassBase.cs @@ -0,0 +1,95 @@ +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Text; +using ImageSharp.Formats; +using Xunit.Abstractions; + +namespace ImageSharp.Tests.Formats.Jpg +{ + public class UtilityTestClassBase + { + public UtilityTestClassBase(ITestOutputHelper output) + { + Output = output; + } + + protected ITestOutputHelper Output { get; } + + // ReSharper disable once InconsistentNaming + public static float[] Create8x8FloatData() + { + float[] result = new float[64]; + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + result[i * 8 + j] = i * 10 + j; + } + } + return result; + } + + + + + // ReSharper disable once InconsistentNaming + public static int[] Create8x8IntData() + { + int[] result = new int[64]; + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + result[i * 8 + j] = i * 10 + j; + } + } + return result; + } + + internal void Print8x8Data(MutableSpan data) => Print8x8Data(data.Data); + + internal void Print8x8Data(T[] data) + { + StringBuilder bld = new StringBuilder(); + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + bld.Append($"{data[i * 8 + j],3} "); + } + bld.AppendLine(); + } + + Output.WriteLine(bld.ToString()); + } + + internal void PrintLinearData(T[] data) => PrintLinearData(new MutableSpan(data), data.Length); + + internal void PrintLinearData(MutableSpan data, int count = -1) + { + if (count < 0) count = data.TotalCount; + + StringBuilder bld = new StringBuilder(); + for (int i = 0; i < count; i++) + { + bld.Append($"{data[i],3} "); + } + Output.WriteLine(bld.ToString()); + } + + protected void Measure(int times, Action action, [CallerMemberName] string operationName = null) + { + Output.WriteLine($"{operationName} X {times} ..."); + Stopwatch sw = Stopwatch.StartNew(); + + for (int i = 0; i < times; i++) + { + action(); + } + + sw.Stop(); + Output.WriteLine($"{operationName} finished in {sw.ElapsedMilliseconds} ms"); + } + } +} \ No newline at end of file