diff --git a/src/ImageSharp46/Common/Extensions/ComparableExtensions.cs b/src/ImageSharp46/Common/Extensions/ComparableExtensions.cs index 8f056ff9d..6cc2eb588 100644 --- a/src/ImageSharp46/Common/Extensions/ComparableExtensions.cs +++ b/src/ImageSharp46/Common/Extensions/ComparableExtensions.cs @@ -3,6 +3,8 @@ // Licensed under the Apache License, Version 2.0. // +using System.Runtime.CompilerServices; + namespace ImageSharp { using System; @@ -94,6 +96,8 @@ namespace ImageSharp /// /// The representing the clamped value. /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float Clamp(this float value, float min, float max) { if (value > max) diff --git a/src/ImageSharp46/Formats/Jpg/Components/Block.cs b/src/ImageSharp46/Formats/Jpg/Components/Block.cs index ca9d15883..f41e615b5 100644 --- a/src/ImageSharp46/Formats/Jpg/Components/Block.cs +++ b/src/ImageSharp46/Formats/Jpg/Components/Block.cs @@ -5,13 +5,14 @@ using System; using System.Buffers; +using System.Runtime.CompilerServices; namespace ImageSharp.Formats { /// /// Represents an 8x8 block of coefficients to transform and encode. /// - internal struct Block : IDisposable + public struct Block : IDisposable { private static ArrayPool IntArrayPool = ArrayPool.Create(BlockSize, 50); @@ -67,7 +68,9 @@ namespace ImageSharp.Formats /// public int this[int index] { + [MethodImpl(MethodImplOptions.AggressiveInlining)] get { return this.Data[index]; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] set { this.Data[index] = value; } } @@ -98,5 +101,11 @@ namespace ImageSharp.Formats } } + public Block Clone() + { + Block clone = Create(); + Array.Copy(Data, clone.Data, BlockSize); + return clone; + } } } diff --git a/src/ImageSharp46/Formats/Jpg/Components/FloatIDCT.cs b/src/ImageSharp46/Formats/Jpg/Components/FloatIDCT.cs new file mode 100644 index 000000000..5f2bd68b9 --- /dev/null +++ b/src/ImageSharp46/Formats/Jpg/Components/FloatIDCT.cs @@ -0,0 +1,208 @@ +using System; +using System.Buffers; + +namespace ImageSharp.Formats +{ + internal class FloatIDCT + { + //private float[] _temp = new float[64]; + + // Cosine matrix and transposed cosine matrix + private static readonly float[] c = buildC(); + private static readonly float[] cT = buildCT(); + + internal FloatIDCT() + { +#if DYNAMIC_IDCT + dynamicIDCT = dynamicIDCT ?? EmitIDCT(); +#endif + } + + /// + /// Precomputes cosine terms in A.3.3 of + /// http://www.w3.org/Graphics/JPEG/itu-t81.pdf + /// + /// Closely follows the term precomputation in the + /// Java Advanced Imaging library. + /// + private static float[] buildC() + { + float[] c = new float[64]; + + for (int i = 0; i < 8; i++) // i == u or v + { + for (int j = 0; j < 8; j++) // j == x or y + { + c[i*8 + j] = i == 0 ? + 0.353553391f : /* 1 / SQRT(8) */ + (float)(0.5 * Math.Cos(((2.0 * j + 1) * i * Math.PI) / 16.0)); + } + } + + return c; + } + private static float[] buildCT() + { + // Transpose i,k <-- j,i + float[] cT = new float[64]; + for (int i = 0; i < 8; i++) + for (int j = 0; j < 8; j++) + cT[j * 8 + i] = c[i * 8 + j]; + return cT; + } + + public static void SetValueClipped(byte[,] arr, int i, int j, float val) + { + // Clip into the 0...255 range & round + arr[i, j] = val < 0 ? (byte)0 + : val > 255 ? (byte)255 + : (byte)(val + 0.5); + } + + public static void Transform(ref Block block) => FastIDCT(block.Data); + + /// See figure A.3.3 IDCT (informative) on A-5. + /// http://www.w3.org/Graphics/JPEG/itu-t81.pdf + public static void FastIDCT(int[] output) + { + //byte[,] output = new byte[8, 8]; + //int[] output = new int[64]; + + float[] _temp = ArrayPool.Shared.Rent(64); + + float[] input = ArrayPool.Shared.Rent(64); + + for (int i = 0; i < output.Length; i++) + { + input[i] = output[i]; + } + + float temp, val = 0; + int idx = 0; + for (int i = 0; i < 8; i++) + { + int i8 = i * 8; + for (int j = 0; j < 8; j++) + { + val = 0; + + for (int k = 0; k < 8; k++) + { + val += input[i8 + k] * c[k*8 + j]; + } + + _temp[idx++] = val; + } + } + for (int i = 0; i < 8; i++) + { + int i8 = i*8; + for (int j = 0; j < 8; j++) + { + temp = 128f; + + for (int k = 0; k < 8; k++) + { + temp += cT[i*8 + k] * _temp[k * 8 + j]; + } + + if (temp < 0) output[i8 + j] = 0; + else if (temp > 255) output[i8+ j] = 255; + else output[i8 + j] = (int)(temp + 0.5); // Implements rounding + } + } + + ArrayPool.Shared.Return(input, true); + ArrayPool.Shared.Return(_temp, true); + } + + + +#if DYNAMIC_IDCT + +/// +/// Generates a pure-IL nonbranching stream of instructions +/// that perform the inverse DCT. Relies on helper function +/// SetValueClipped. +/// +/// A delegate to the DynamicMethod + private static IDCTFunc EmitIDCT() + { + Type[] args = { typeof(float[]), typeof(float[]), typeof(byte[,]) }; + + DynamicMethod idctMethod = new DynamicMethod("dynamicIDCT", + null, // no return type + args); // input arrays + + ILGenerator il = idctMethod.GetILGenerator(); + + int idx = 0; + + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + il.Emit(OpCodes.Ldarg_1); // 1 {temp} + il.Emit(OpCodes.Ldc_I4_S, (short)idx++); // 3 {temp, idx} + + for (int k = 0; k < 8; k++) + { + il.Emit(OpCodes.Ldarg_0); // {in} + il.Emit(OpCodes.Ldc_I4_S, (short)(i * 8 + k)); // {in,idx} + il.Emit(OpCodes.Ldelem_R4); // {in[idx]} + il.Emit(OpCodes.Ldc_R4, c[k, j]); // {in[idx],c[k,j]} + il.Emit(OpCodes.Mul); // {in[idx]*c[k,j]} + if (k != 0) il.Emit(OpCodes.Add); + } + + il.Emit(OpCodes.Stelem_R4); // {} + } + } + + var meth = typeof(DCT).GetMethod("SetValueClipped", + BindingFlags.Static | BindingFlags.Public, null, + CallingConventions.Standard, + new Type[] { + typeof(byte[,]), // arr + typeof(int), // i + typeof(int), // j + typeof(float) } // val + , null); + + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + il.Emit(OpCodes.Ldarg_2); // {output} + il.Emit(OpCodes.Ldc_I4_S, (short)i); // {output,i} + il.Emit(OpCodes.Ldc_I4_S, (short)j); // X={output,i,j} + + il.Emit(OpCodes.Ldc_R4, 128.0f); // {X,128.0f} + + for (int k = 0; k < 8; k++) + { + il.Emit(OpCodes.Ldarg_1); // {X,temp} + il.Emit(OpCodes.Ldc_I4_S, + (short)(k * 8 + j)); // {X,temp,idx} + il.Emit(OpCodes.Ldelem_R4); // {X,temp[idx]} + il.Emit(OpCodes.Ldc_R4, cT[i, k]); // {X,temp[idx],cT[i,k]} + il.Emit(OpCodes.Mul); // {X,in[idx]*c[k,j]} + il.Emit(OpCodes.Add); + } + + il.EmitCall(OpCodes.Call, meth, null); + } + } + + il.Emit(OpCodes.Ret); + + return (IDCTFunc)idctMethod.CreateDelegate(typeof(IDCTFunc)); + } + + private delegate void IDCTFunc(float[] input, float[] temp, byte[,] output); + private static IDCTFunc dynamicIDCT = null; +#endif + + + } +} \ No newline at end of file diff --git a/src/ImageSharp46/Formats/Jpg/Components/Huffman.cs b/src/ImageSharp46/Formats/Jpg/Components/Huffman.cs index cef99ea1a..345b45e0e 100644 --- a/src/ImageSharp46/Formats/Jpg/Components/Huffman.cs +++ b/src/ImageSharp46/Formats/Jpg/Components/Huffman.cs @@ -3,36 +3,41 @@ // Licensed under the Apache License, Version 2.0. // +using System; +using System.Buffers; + namespace ImageSharp.Formats { /// /// Represents a Huffman tree /// - internal struct Huffman + internal struct Huffman : IDisposable { - /// - /// Initializes a new instance of the class. - /// - /// The log-2 size of the Huffman decoder's look-up table. - /// The maximum (inclusive) number of codes in a Huffman tree. - /// The maximum (inclusive) number of bits in a Huffman code. - //public Huffman(int lutSize, int maxNCodes, int maxCodeLength) - //{ - // this.Lut = new ushort[1 << lutSize]; - // this.Values = new byte[maxNCodes]; - // this.MinCodes = new int[maxCodeLength]; - // this.MaxCodes = new int[maxCodeLength]; - // this.Indices = new int[maxCodeLength]; - // this.Length = 0; - //} + private static ArrayPool UshortBuffer = + //ArrayPool.Shared; + ArrayPool.Create(1 << JpegDecoderCore.LutSize, 50); + + private static ArrayPool ByteBuffer = + //ArrayPool.Shared; + ArrayPool.Create(JpegDecoderCore.MaxNCodes, 50); + + private static readonly ArrayPool IntBuffer = + //ArrayPool.Shared; + ArrayPool.Create(JpegDecoderCore.MaxCodeLength, 50); public void Init(int lutSize, int maxNCodes, int maxCodeLength) { - this.Lut = new ushort[1 << lutSize]; - this.Values = new byte[maxNCodes]; - this.MinCodes = new int[maxCodeLength]; - this.MaxCodes = new int[maxCodeLength]; - this.Indices = new int[maxCodeLength]; + //this.Lut = new ushort[1 << lutSize]; + //this.Values = new byte[maxNCodes]; + //this.MinCodes = new int[maxCodeLength]; + //this.MaxCodes = new int[maxCodeLength]; + //this.Indices = new int[maxCodeLength]; + + this.Lut = UshortBuffer.Rent(1 << lutSize); + this.Values = ByteBuffer.Rent(maxNCodes); + this.MinCodes = IntBuffer.Rent(maxCodeLength); + this.MaxCodes = IntBuffer.Rent(maxCodeLength); ; + this.Indices = IntBuffer.Rent(maxCodeLength); ; } /// @@ -69,6 +74,15 @@ namespace ImageSharp.Formats /// Gets the array of indices. Indices[i] is the index into Values of MinCodes[i]. /// public int[] Indices; + + public void Dispose() + { + UshortBuffer.Return(Lut, true); + ByteBuffer.Return(Values, true); + IntBuffer.Return(MinCodes, true); + IntBuffer.Return(MaxCodes, true); + IntBuffer.Return(Indices, true); + } } diff --git a/src/ImageSharp46/Formats/Jpg/Components/IDCT.cs b/src/ImageSharp46/Formats/Jpg/Components/IDCT.cs index 4ca4ba903..88b493c4b 100644 --- a/src/ImageSharp46/Formats/Jpg/Components/IDCT.cs +++ b/src/ImageSharp46/Formats/Jpg/Components/IDCT.cs @@ -167,5 +167,6 @@ namespace ImageSharp.Formats src[56 + x] = (y7 - y1) >> 14; } } + } } diff --git a/src/ImageSharp46/Formats/Jpg/Components/MagicDCT.cs b/src/ImageSharp46/Formats/Jpg/Components/MagicDCT.cs new file mode 100644 index 000000000..6c71ec92e --- /dev/null +++ b/src/ImageSharp46/Formats/Jpg/Components/MagicDCT.cs @@ -0,0 +1,572 @@ +using System; +using System.Buffers; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace ImageSharp.Formats +{ + public struct Span + where T : struct + { + public T[] Data; + public int Offset; + + public Span(int size, int offset = 0) + { + Data = new T[size]; + Offset = offset; + } + + public Span(T[] data, int offset = 0) + { + Data = data; + Offset = offset; + } + + public T this[int idx] + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] get { return Data[idx + Offset]; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] set { Data[idx + Offset] = value; } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Span Slice(int offset) + { + return new Span(Data, Offset + offset); + } + + public static implicit operator Span(T[] data) => new Span(data, 0); + + private static readonly ArrayPool Pool = ArrayPool.Create(128, 10); + + public static Span RentFromPool(int size, int offset = 0) + { + return new Span(Pool.Rent(size), offset); + } + + public void ReturnToPool() + { + Pool.Return(Data, true); + Data = null; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void AddOffset(int offset) + { + Offset += offset; + } + } + + public static class MagicDCT + { + private static readonly ArrayPool FloatArrayPool = ArrayPool.Create(Block.BlockSize, 50); + + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Matrix4x4 Load(Span src, int x, int y) + { + int b0 = y*8 + x; + y++; + int b1 = y*8 + x; + y++; + int b2 = y*8 + x; + y++; + int b3 = y*8 + x; + + return new Matrix4x4( + src[b0], src[b0 + 1], src[b0 + 2], src[b0 + 3], + src[b1], src[b1 + 1], src[b1 + 2], src[b1 + 3], + src[b2], src[b2 + 1], src[b2 + 2], src[b2 + 3], + src[b3], src[b3 + 1], src[b3 + 2], src[b3 + 3] + ); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void Store(Matrix4x4 s, Span d, int x, int y) + { + int b0 = y*8 + x; + y++; + int b1 = y*8 + x; + y++; + int b2 = y*8 + x; + y++; + int b3 = y*8 + x; + + d[b0] = s.M11; + d[b0 + 1] = s.M12; + d[b0 + 2] = s.M13; + d[b0 + 3] = s.M14; + d[b1] = s.M21; + d[b1 + 1] = s.M22; + d[b1 + 2] = s.M23; + d[b1 + 3] = s.M24; + d[b2] = s.M31; + d[b2 + 1] = s.M32; + d[b2 + 2] = s.M33; + d[b2 + 3] = s.M34; + d[b3] = s.M41; + d[b3 + 1] = s.M42; + d[b3 + 2] = s.M43; + d[b3 + 3] = s.M44; + } + + public static void Transpose8x8_SSE_Slow(Span data) + { + Matrix4x4 a11 = Load(data, 0, 0); + Matrix4x4 a12 = Load(data, 4, 0); + Matrix4x4 a21 = Load(data, 0, 4); + Matrix4x4 a22 = Load(data, 4, 4); + + a11 = Matrix4x4.Transpose(a11); + a12 = Matrix4x4.Transpose(a12); + a21 = Matrix4x4.Transpose(a21); + a22 = Matrix4x4.Transpose(a22); + + Store(a11, data, 0, 0); + Store(a21, data, 4, 0); + Store(a12, data, 0, 4); + Store(a22, data, 4, 4); + } + + public static void Transpose8x8_SSE_Slow(Span src, Span dest) + { + Matrix4x4 a11 = Load(src, 0, 0); + Matrix4x4 a12 = Load(src, 4, 0); + Matrix4x4 a21 = Load(src, 0, 4); + Matrix4x4 a22 = Load(src, 4, 4); + + a11 = Matrix4x4.Transpose(a11); + a12 = Matrix4x4.Transpose(a12); + a21 = Matrix4x4.Transpose(a21); + a22 = Matrix4x4.Transpose(a22); + + Store(a11, dest, 0, 0); + Store(a21, dest, 4, 0); + Store(a12, dest, 0, 4); + Store(a22, dest, 4, 4); + } + + public static void Transpose8x8(Span data) + { + for (int i = 1; i < 8; i++) + { + int i8 = i*8; + for (int j = 0; j < i; j++) + { + float tmp = data[i8 + j]; + data[i8 + j] = data[j*8 + i]; + data[j*8 + i] = tmp; + } + } + } + + public static void Transpose8x8(Span src, Span dest) + { + for (int i = 0; i < 8; i++) + { + int i8 = i*8; + for (int j = 0; j < 8; j++) + { + dest[j*8 + i] = src[i8 + j]; + } + } + + //Matrix4x4 a11 = Load(src, 0, 0); + //Matrix4x4 a12 = Load(src, 4, 0); + //Matrix4x4 a21 = Load(src, 0, 4); + //Matrix4x4 a22 = Load(src, 4, 4); + + //a11 = Matrix4x4.Transpose(a11); + //a12 = Matrix4x4.Transpose(a12); + //a21 = Matrix4x4.Transpose(a21); + //a22 = Matrix4x4.Transpose(a22); + + //Store(a11, dest, 0, 0); + //Store(a21, dest, 4, 0); + //Store(a12, dest, 0, 4); + //Store(a22, dest, 4, 4); + } + + public static void iDCT1Dllm_32f(Span y, Span x) + { + float a0, a1, a2, a3, b0, b1, b2, b3; + float z0, z1, z2, z3, z4; + + float r0 = 1.414214f; + float r1 = 1.387040f; + float r2 = 1.306563f; + float r3 = 1.175876f; + float r4 = 1.000000f; + float r5 = 0.785695f; + float r6 = 0.541196f; + float r7 = 0.275899f; + + z0 = y[1] + y[7]; + z1 = y[3] + y[5]; + z2 = y[3] + y[7]; + z3 = y[1] + y[5]; + z4 = (z0 + z1)*r3; + + z0 = z0*(-r3 + r7); + z1 = z1*(-r3 - r1); + z2 = z2*(-r3 - r5) + z4; + z3 = z3*(-r3 + r5) + z4; + + b3 = y[7]*(-r1 + r3 + r5 - r7) + z0 + z2; + b2 = y[5]*(r1 + r3 - r5 + r7) + z1 + z3; + b1 = y[3]*(r1 + r3 + r5 - r7) + z1 + z2; + b0 = y[1]*(r1 + r3 - r5 - r7) + z0 + z3; + + z4 = (y[2] + y[6])*r6; + z0 = y[0] + y[4]; + z1 = y[0] - y[4]; + z2 = z4 - y[6]*(r2 + r6); + z3 = z4 + y[2]*(r2 - r6); + a0 = z0 + z3; + a3 = z0 - z3; + a1 = z1 + z2; + a2 = z1 - z2; + + x[0] = a0 + b0; + x[7] = a0 - b0; + x[1] = a1 + b1; + x[6] = a1 - b1; + x[2] = a2 + b2; + x[5] = a2 - b2; + x[3] = a3 + b3; + x[4] = a3 - b3; + } + + public static void iDCT2D_llm(Span s, Span d, Span temp) + { + int j; + + for (j = 0; j < 8; j++) + { + iDCT1Dllm_32f(s.Slice(j*8), temp.Slice(j*8)); + } + + Transpose8x8(temp, d); + + for (j = 0; j < 8; j++) + { + iDCT1Dllm_32f(d.Slice(j*8), temp.Slice(j*8)); + } + + Transpose8x8(temp, d); + + for (j = 0; j < 64; j++) + { + d[j] *= 0.125f; + } + } + + public static void IDCT(ref Block block) + { + Span src = Span.RentFromPool(64); + + for (int i = 0; i < 64; i++) + { + src[i] = block[i]; + } + + Span dest = Span.RentFromPool(64); + Span temp = Span.RentFromPool(64); + + //iDCT2D_llm(src, dest, temp); + //iDCT8x8GT(src, dest); + iDCT8x8_llm_sse(src, dest, temp); + + for (int i = 0; i < 64; i++) + { + block[i] = (int) (dest[i] + 0.5f); + } + + src.ReturnToPool(); + dest.ReturnToPool(); + temp.ReturnToPool(); + } + + public static void iDCT8x8GT(Span s, Span d) + { + idct81d_sse_GT(s, d); + + Transpose8x8(d); + + idct81d_sse_GT(d, d); + + Transpose8x8(d); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 _mm_load_ps(Span src, int offset) + { + src = src.Slice(offset); + return new Vector4(src[0], src[1], src[2], src[3]); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector4 _mm_load_ps(Span src) + { + return new Vector4(src[0], src[1], src[2], src[3]); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void _mm_store_ps(Span dest, int offset, Vector4 src) + { + dest = dest.Slice(offset); + dest[0] = src.X; + dest[1] = src.Y; + dest[2] = src.Z; + dest[3] = src.W; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void _mm_store_ps(Span dest, Vector4 src) + { + dest[0] = src.X; + dest[1] = src.Y; + dest[2] = src.Z; + dest[3] = src.W; + } + + + public static void idct81d_sse_GT(Span src, Span dst) + { + Vector4 c1414 = new Vector4(1.4142135623731f); + Vector4 c0250 = new Vector4(0.25f); + Vector4 c0353 = new Vector4(0.353553390593274f); + Vector4 c0707 = new Vector4(0.707106781186547f); + + for (int i = 0; i < 2; i++) + { + Vector4 ms0 = _mm_load_ps(src, 0); + Vector4 ms1 = _mm_load_ps(src, 8); + Vector4 ms2 = _mm_load_ps(src, 16); + Vector4 ms3 = _mm_load_ps(src, 24); + Vector4 ms4 = _mm_load_ps(src, 32); + Vector4 ms5 = _mm_load_ps(src, 40); + Vector4 ms6 = _mm_load_ps(src, 48); + Vector4 ms7 = _mm_load_ps(src, 56); + + Vector4 mx00 = (c1414*ms0); + + Vector4 mx01 = ((new Vector4(1.38703984532215f)*ms1) + (new Vector4(0.275899379282943f)*ms7)); + Vector4 mx02 = ((new Vector4(1.30656296487638f)*ms2) + (new Vector4(0.541196100146197f)*ms6)); + Vector4 mx03 = ((new Vector4(1.17587560241936f)*ms3) + (new Vector4(0.785694958387102f)*ms5)); + + Vector4 mx04 = (c1414*ms4); + + Vector4 mx05 = ((new Vector4(-0.785694958387102f)*ms3) + (new Vector4(+1.17587560241936f)*ms5)); + Vector4 mx06 = ((new Vector4(0.541196100146197f)*ms2) + (new Vector4(-1.30656296487638f)*ms6)); + Vector4 mx07 = ((new Vector4(-0.275899379282943f)*ms1) + (new Vector4(1.38703984532215f)*ms7)); + Vector4 mx09 = (mx00 + mx04); + Vector4 mx0a = (mx01 + mx03); + + Vector4 mx0b = (c1414*mx02); + + Vector4 mx0c = (mx00 - mx04); + Vector4 mx0d = (mx01 - mx03); + + Vector4 mx0e = (c0353*(mx09 - mx0b)); + Vector4 mx0f = (c0353*(mx0c - mx0d)); + Vector4 mx10 = (c0353*(mx0c - mx0d)); + Vector4 mx11 = (c1414*mx06); + + Vector4 mx12 = (mx05 + mx07); + + Vector4 mx13 = (mx05 - mx07); + + Vector4 mx14 = (c0353*(mx11 + mx12)); + Vector4 mx15 = (c0353*(mx11 - mx12)); + Vector4 mx16 = (new Vector4(0.5f)*mx13); + + _mm_store_ps(dst, 0, ((c0250 + (mx09 + mx0b))*(c0353*mx0a))); + _mm_store_ps(dst, 8, (c0707*(mx0f + mx15))); + _mm_store_ps(dst, 16, (c0707*(mx0f - mx15))); + _mm_store_ps(dst, 24, (c0707*(mx0e + mx16))); + _mm_store_ps(dst, 32, (c0707*(mx0e - mx16))); + _mm_store_ps(dst, 40, (c0707*(mx10 - mx14))); + _mm_store_ps(dst, 48, (c0707*(mx10 + mx14))); + + _mm_store_ps(dst, 56, ((c0250*(mx09 + mx0b)) - (c0353*mx0a))); + + dst = dst.Slice(4); + src = src.Slice(4); + } + } + + private static readonly Vector4 _1_175876 = new Vector4(1.175876f); + private static readonly Vector4 _1_961571 = new Vector4(-1.961571f); + private static readonly Vector4 _0_390181 = new Vector4(-0.390181f); + private static readonly Vector4 _0_899976 = new Vector4(-0.899976f); + private static readonly Vector4 _2_562915 = new Vector4(-2.562915f); + private static readonly Vector4 _0_298631 = new Vector4(0.298631f); + private static readonly Vector4 _2_053120 = new Vector4(2.053120f); + private static readonly Vector4 _3_072711 = new Vector4(3.072711f); + private static readonly Vector4 _1_501321 = new Vector4(1.501321f); + private static readonly Vector4 _0_541196 = new Vector4(0.541196f); + private static readonly Vector4 _1_847759 = new Vector4(-1.847759f); + private static readonly Vector4 _0_765367 = new Vector4(0.765367f); + + public static void iDCT2D8x4_32f(Span y, Span x) + { + /* + float a0,a1,a2,a3,b0,b1,b2,b3; float z0,z1,z2,z3,z4; float r[8]; int i; + for(i = 0;i < 8;i++){ r[i] = (float)(cos((double)i / 16.0 * M_PI) * M_SQRT2); } + */ + /* + 0: 1.414214 + 1: 1.387040 + 2: 1.306563 + 3: + 4: 1.000000 + 5: 0.785695 + 6: + 7: 0.275899 + */ + Vector4 my1 = _mm_load_ps(y, 8); + Vector4 my7 = _mm_load_ps(y, 56); + Vector4 mz0 = my1 + my7; + + Vector4 my3 = _mm_load_ps(y, 24); + Vector4 mz2 = my3 + my7; + Vector4 my5 = _mm_load_ps(y, 40); + Vector4 mz1 = my3 + my5; + Vector4 mz3 = my1 + my5; + + Vector4 mz4 = ((mz0 + mz1)* _1_175876); + //z0 = y[1] + y[7]; z1 = y[3] + y[5]; z2 = y[3] + y[7]; z3 = y[1] + y[5]; + //z4 = (z0 + z1) * r[3]; + + mz2 = mz2* _1_961571 + mz4; + mz3 = mz3* _0_390181 + mz4; + mz0 = mz0* _0_899976; + mz1 = mz1* _2_562915; + + /* + -0.899976 + -2.562915 + -1.961571 + -0.390181 + z0 = z0 * (-r[3] + r[7]); + z1 = z1 * (-r[3] - r[1]); + z2 = z2 * (-r[3] - r[5]) + z4; + z3 = z3 * (-r[3] + r[5]) + z4;*/ + + + Vector4 mb3 = my7* _0_298631 + mz0 + mz2; + Vector4 mb2 = my5* _2_053120 + mz1 + mz3; + Vector4 mb1 = my3* _3_072711 + mz1 + mz2; + Vector4 mb0 = my1* _1_501321 + mz0 + mz3; + + /* + 0.298631 + 2.053120 + 3.072711 + 1.501321 + b3 = y[7] * (-r[1] + r[3] + r[5] - r[7]) + z0 + z2; + b2 = y[5] * ( r[1] + r[3] - r[5] + r[7]) + z1 + z3; + b1 = y[3] * ( r[1] + r[3] + r[5] - r[7]) + z1 + z2; + b0 = y[1] * ( r[1] + r[3] - r[5] - r[7]) + z0 + z3; + */ + + Vector4 my2 = _mm_load_ps(y, 16); + Vector4 my6 = _mm_load_ps(y, 48); + mz4 = (my2 + my6)* _0_541196; + Vector4 my0 = _mm_load_ps(y, 0); + Vector4 my4 = _mm_load_ps(y, 32); + mz0 = my0 + my4; + mz1 = my0 - my4; + + mz2 = mz4 + my6* _1_847759; + mz3 = mz4 + my2* _0_765367; + + my0 = mz0 + mz3; + my3 = mz0 - mz3; + my1 = mz1 + mz2; + my2 = mz1 - mz2; + /* + 1.847759 + 0.765367 + z4 = (y[2] + y[6]) * r[6]; + z0 = y[0] + y[4]; z1 = y[0] - y[4]; + z2 = z4 - y[6] * (r[2] + r[6]); + z3 = z4 + y[2] * (r[2] - r[6]); + a0 = z0 + z3; a3 = z0 - z3; + a1 = z1 + z2; a2 = z1 - z2; + */ + + _mm_store_ps(x, 0, my0 + mb0); + + _mm_store_ps(x, 56, my0 - mb0); + + _mm_store_ps(x, 8, my1 + mb1); + + _mm_store_ps(x, 48, my1 - mb1); + + _mm_store_ps(x, 16, my2 + mb2); + + _mm_store_ps(x, 40, my2 - mb2); + + _mm_store_ps(x, 24, my3 + mb3); + + _mm_store_ps(x, 32, my3 - mb3); + /* + x[0] = a0 + b0; x[7] = a0 - b0; + x[1] = a1 + b1; x[6] = a1 - b1; + x[2] = a2 + b2; x[5] = a2 - b2; + x[3] = a3 + b3; x[4] = a3 - b3; + for(i = 0;i < 8;i++){ x[i] *= 0.353554f; } + */ + } + + public static void iDCT8x8_llm_sse(Span s, Span d, Span temp) + { + Transpose8x8(s, temp); + iDCT2D8x4_32f(temp, d); + + iDCT2D8x4_32f(temp.Slice(4), d.Slice(4)); + + Transpose8x8(d, temp); + + iDCT2D8x4_32f(temp, d); + + iDCT2D8x4_32f(temp.Slice(4), d.Slice(4)); + + Vector4 c = new Vector4(0.1250f); + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//0 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//1 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//2 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//3 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//4 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//5 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//6 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//7 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//8 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//9 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//10 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//11 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//12 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//13 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//14 + + _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//15 + } +} +} \ No newline at end of file diff --git a/src/ImageSharp46/Formats/Jpg/JpegDecoderCore.cs b/src/ImageSharp46/Formats/Jpg/JpegDecoderCore.cs index e5191069a..2f0b22829 100644 --- a/src/ImageSharp46/Formats/Jpg/JpegDecoderCore.cs +++ b/src/ImageSharp46/Formats/Jpg/JpegDecoderCore.cs @@ -3,6 +3,8 @@ // Licensed under the Apache License, Version 2.0. // +using System.Runtime.CompilerServices; + namespace ImageSharp.Formats { using System; @@ -17,17 +19,17 @@ namespace ImageSharp.Formats /// /// The maximum (inclusive) number of bits in a Huffman code. /// - private const int MaxCodeLength = 16; + internal const int MaxCodeLength = 16; /// /// The maximum (inclusive) number of codes in a Huffman tree. /// - private const int MaxNCodes = 256; + internal const int MaxNCodes = 256; /// /// The log-2 size of the Huffman decoder's look-up table. /// - private const int LutSize = 8; + internal const int LutSize = 8; /// /// The maximum number of color components @@ -1401,7 +1403,7 @@ namespace ImageSharp.Formats byte cr = this.ycbcrImage.CrChannel[co + (x / scale)]; TColor packed = default(TColor); - this.PackYcbCr(ref packed, yy, cb, cr); + PackYcbCr(ref packed, yy, cb, cr); pixels[x, y] = packed; } }); @@ -1497,8 +1499,8 @@ namespace ImageSharp.Formats this.ReadFull(this.temp, 0, remaining); byte scanComponentCount = this.temp[0]; - int scanComponentCountBy2 = 2 * scanComponentCount; - if (remaining != 4 + scanComponentCountBy2) + int scanComponentCountX2 = 2 * scanComponentCount; + if (remaining != 4 + scanComponentCountX2) { throw new ImageFormatException("SOS length inconsistent with number of components"); } @@ -1539,10 +1541,10 @@ namespace ImageSharp.Formats if (this.isProgressive) { - zigStart = this.temp[1 + scanComponentCountBy2]; - zigEnd = this.temp[2 + scanComponentCountBy2]; - ah = this.temp[3 + scanComponentCountBy2] >> 4; - al = this.temp[3 + scanComponentCountBy2] & 0x0f; + zigStart = this.temp[1 + scanComponentCountX2]; + zigEnd = this.temp[2 + scanComponentCountX2]; + ah = this.temp[3 + scanComponentCountX2] >> 4; + al = this.temp[3 + scanComponentCountX2] & 0x0f; if ((zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || Block.BlockSize <= zigEnd) { @@ -1655,11 +1657,7 @@ namespace ImageSharp.Formats var qtIndex = this.componentArray[compIndex].Selector; - // Load the previous partially decoded coefficients, if applicable. - - //b = this.isProgressive ? this.progCoeffs[compIndex][blockIndex] : new Block(); - - if (this.isProgressive) + if (this.isProgressive) // Load the previous partially decoded coefficients, if applicable. { blockIndex = ((@by * mxx) * hi) + bx; ProcessBlockImpl(ah, @@ -1796,9 +1794,7 @@ namespace ImageSharp.Formats { // We haven't completely decoded this 8x8 block. Save the coefficients. - // TODO: This should be broken when isProgressive == true - - this.progCoeffs[compIndex][((@by*mxx)*hi) + bx] = b; + this.progCoeffs[compIndex][((@by*mxx)*hi) + bx] = b.Clone(); // At this point, we could execute the rest of the loop body to dequantize and // perform the inverse DCT, to save early stages of a progressive image to the @@ -1815,7 +1811,9 @@ namespace ImageSharp.Formats b[Unzig[zig]] *= qt[zig]; } - IDCT.Transform(ref b); + //IDCT.Transform(ref b); + //FloatIDCT.Transform(ref b); + MagicDCT.IDCT(ref b); byte[] dst; int offset; @@ -2168,7 +2166,8 @@ namespace ImageSharp.Formats /// The y luminance component. /// The cb chroma component. /// The cr chroma component. - private void PackYcbCr(ref TColor packed, byte y, byte cb, byte cr) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void PackYcbCr(ref TColor packed, byte y, byte cb, byte cr) where TColor : struct, IPackedPixel where TPacked : struct { @@ -2273,6 +2272,11 @@ namespace ImageSharp.Formats Block.DisposeAll(blocks); } } + + for (int i = 0; i < huffmanTrees.Length; i++) + { + huffmanTrees[i].Dispose(); + } } } } diff --git a/src/ImageSharp46/ImageSharp46.csproj b/src/ImageSharp46/ImageSharp46.csproj index e88dbb0e2..fdb1b7293 100644 --- a/src/ImageSharp46/ImageSharp46.csproj +++ b/src/ImageSharp46/ImageSharp46.csproj @@ -230,9 +230,11 @@ + + diff --git a/tests/ImageSharp.Tests46/DctSandbox.cs b/tests/ImageSharp.Tests46/DctSandbox.cs new file mode 100644 index 000000000..d3b1bacb2 --- /dev/null +++ b/tests/ImageSharp.Tests46/DctSandbox.cs @@ -0,0 +1,97 @@ +using System.Numerics; +using System.Text; +using ImageSharp.Formats; +using Xunit; +using Xunit.Abstractions; + +namespace ImageSharp.Tests +{ + public class DctSandbox + { + + private ITestOutputHelper Output { get; } + + public DctSandbox(ITestOutputHelper output) + { + Output = output; + } + + private float[] CreateTestData() + { + float[] result =new float[64]; + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + result[i*8 + j] = i*10 + j; + } + } + return result; + } + + private void Print(float[] data) + { + StringBuilder bld = new StringBuilder(); + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + bld.Append($"{data[i * 8 + j],3} "); + } + bld.AppendLine(); + } + + Output.WriteLine(bld.ToString()); + } + + [Fact] + public void Mennyi() + { + Output.WriteLine(Vector.IsHardwareAccelerated.ToString()); + Output.WriteLine(Vector.Count.ToString()); + } + + [Fact] + public void CheckTestData() + { + var data = CreateTestData(); + + Print(data); + } + + [Fact] + public void Load_Store() + { + var data = CreateTestData(); + + var m = MagicDCT.Load(data, 1, 1); + m = Matrix4x4.Transpose(m); + + MagicDCT.Store(m, data, 4, 4); + + Print(data); + } + + [Fact] + public void Transpose8x8() + { + var data = CreateTestData(); + + Span result = new Span(64); + + MagicDCT.Transpose8x8(data, result); + + Print(result.Data); + } + + [Fact] + public void Transpose8x8_Inplace() + { + var data = CreateTestData(); + + MagicDCT.Transpose8x8(data); + + Print(data); + } + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Tests46/Formats/Bmp/BitmapTests.cs b/tests/ImageSharp.Tests46/Formats/Bmp/BitmapTests.cs index 549ac05ef..c91b0ad1b 100644 --- a/tests/ImageSharp.Tests46/Formats/Bmp/BitmapTests.cs +++ b/tests/ImageSharp.Tests46/Formats/Bmp/BitmapTests.cs @@ -3,6 +3,8 @@ // Licensed under the Apache License, Version 2.0. // +using ImageSharp.Formats; + namespace ImageSharp.Tests { using System.IO; diff --git a/tests/ImageSharp.Tests46/Formats/Jpg/JpegTests.cs b/tests/ImageSharp.Tests46/Formats/Jpg/JpegTests.cs new file mode 100644 index 000000000..57bce1504 --- /dev/null +++ b/tests/ImageSharp.Tests46/Formats/Jpg/JpegTests.cs @@ -0,0 +1,85 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using ImageSharp.Formats; +using Xunit; +using Xunit.Abstractions; + +namespace ImageSharp.Tests.Formats.Jpg +{ + public class JpegTests + { + + public const string TestOutputDirectory = "TestOutput/Jpeg"; + + private ITestOutputHelper Output { get; } + + public JpegTests(ITestOutputHelper output) + { + Output = output; + } + + protected string CreateTestOutputFile(string fileName) + { + if (!Directory.Exists(TestOutputDirectory)) + { + Directory.CreateDirectory(TestOutputDirectory); + } + + //string id = Guid.NewGuid().ToString().Substring(0, 4); + + string ext = Path.GetExtension(fileName); + fileName = Path.GetFileNameWithoutExtension(fileName); + + return $"{TestOutputDirectory}/{fileName}{ext}"; + } + + protected Stream CreateOutputStream(string fileName) + { + fileName = CreateTestOutputFile(fileName); + Output?.WriteLine("Opened for write: "+fileName); + return File.OpenWrite(fileName); + } + + public static IEnumerable AllJpegFiles + => TestImages.Jpeg.All.Select(fn => new object[] {fn}); + + [Theory] + [MemberData(nameof(AllJpegFiles))] + public void OpenJpeg_SaveBmp(string jpegPath) + { + string bmpFileName = Path.GetFileNameWithoutExtension(jpegPath) + ".bmp"; + + using (var inputStream = File.OpenRead(jpegPath)) + { + var image = new Image(inputStream); + + using (var outputStream = CreateOutputStream(bmpFileName)) + { + image.Save(outputStream, new BmpFormat()); + } + } + } + + public static IEnumerable AllBmpFiles + => TestImages.Jpeg.All.Select(fn => new object[] {fn}); + + [Theory] + [MemberData(nameof(AllBmpFiles))] + public void OpenBmp_SaveJpeg(string bmpPath) + { + string jpegPath = Path.GetFileNameWithoutExtension(bmpPath) + ".jpeg"; + + using (var inputStream = File.OpenRead(bmpPath)) + { + var image = new Image(inputStream); + + using (var outputStream = CreateOutputStream(jpegPath)) + { + image.Save(outputStream, new JpegFormat()); + } + } + } + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Tests46/Formats/Png/PngTests.cs b/tests/ImageSharp.Tests46/Formats/Png/PngTests.cs index 165a32ec9..3c37ce898 100644 --- a/tests/ImageSharp.Tests46/Formats/Png/PngTests.cs +++ b/tests/ImageSharp.Tests46/Formats/Png/PngTests.cs @@ -3,6 +3,8 @@ // Licensed under the Apache License, Version 2.0. // +using ImageSharp.Formats; + namespace ImageSharp.Tests { using System.IO; diff --git a/tests/ImageSharp.Tests46/ImageSharp.Tests46.csproj b/tests/ImageSharp.Tests46/ImageSharp.Tests46.csproj index a1b4e990f..ab80f3d93 100644 --- a/tests/ImageSharp.Tests46/ImageSharp.Tests46.csproj +++ b/tests/ImageSharp.Tests46/ImageSharp.Tests46.csproj @@ -71,14 +71,15 @@ + + - diff --git a/tests/ImageSharp.Tests46/JpegSandbox.cs b/tests/ImageSharp.Tests46/JpegSandbox.cs deleted file mode 100644 index d6e5bff4b..000000000 --- a/tests/ImageSharp.Tests46/JpegSandbox.cs +++ /dev/null @@ -1,77 +0,0 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Numerics; -using ImageSharp.Formats; -using Xunit; -using Xunit.Abstractions; - -namespace ImageSharp.Tests -{ - public class JpegSandbox - { - - public const string SandboxOutputDirectory = "_SandboxOutput"; - - private ITestOutputHelper Output { get; } - - public JpegSandbox(ITestOutputHelper output) - { - Output = output; - } - - protected string CreateTestOutputFile(string fileName) - { - if (!Directory.Exists(SandboxOutputDirectory)) - { - Directory.CreateDirectory(SandboxOutputDirectory); - } - - string id = Guid.NewGuid().ToString().Substring(0, 4); - - string ext = Path.GetExtension(fileName); - fileName = Path.GetFileNameWithoutExtension(fileName); - - return $"{SandboxOutputDirectory}/{fileName}_{id}{ext}"; - } - - protected Stream CreateOutputStream(string fileName) - { - fileName = CreateTestOutputFile(fileName); - Output?.WriteLine("Opened for write: "+fileName); - return File.OpenWrite(fileName); - } - - //public static string[][] AllJpegFiles = new[] - //{ - // TestImages.Jpeg.All - //}; - - public static IEnumerable AllJpegFiles => TestImages.Jpeg.All.Select(fn => new object[] {fn}); - - [Theory] - [MemberData(nameof(AllJpegFiles))] - public void OpenJpeg_SaveBmp(string jpegFileName) - { - var image = new TestFile(jpegFileName).CreateImage(); - string bmpFileName = Path.GetFileNameWithoutExtension(jpegFileName) + ".bmp"; - - using (var stream = CreateOutputStream(bmpFileName)) - { - image.Save(stream, new BmpFormat()); - } - } - - [Fact] - public void Boo() - { - Vector hej = new Vector(); - - - - Output.WriteLine(Vector.Count.ToString()); - } - - } -} \ No newline at end of file diff --git a/tests/ImageSharp.Tests46/TestImages.cs b/tests/ImageSharp.Tests46/TestImages.cs index 1c9d61064..edb2449e8 100644 --- a/tests/ImageSharp.Tests46/TestImages.cs +++ b/tests/ImageSharp.Tests46/TestImages.cs @@ -3,6 +3,8 @@ // Licensed under the Apache License, Version 2.0. // +using System.Diagnostics.PerformanceData; + namespace ImageSharp.Tests { /// @@ -33,9 +35,11 @@ namespace ImageSharp.Tests public const string Progress = Folder + "progress.jpg"; public const string GammaDalaiLamaGray = Folder + "gamma_dalai_lama_gray.jpg"; + public const string Geneserath = Folder + "geneserath.jpg"; + public static readonly string[] All = new[] { - Cmyk, Exif, Floorplan, Calliphora, Turtle, Fb, Progress, GammaDalaiLamaGray + Cmyk, Exif, Floorplan, Calliphora, Turtle, Fb, Progress, GammaDalaiLamaGray, Geneserath }; } @@ -44,10 +48,10 @@ namespace ImageSharp.Tests private static readonly string folder = "../../TestImages/Formats/Bmp/"; public static string Car => folder + "Car.bmp"; - public static string F => folder + "F.bmp"; - public static string NegHeight => folder + "neg_height.bmp"; + + public static string[] All => new[] {Car, F, NegHeight}; } public static class Gif diff --git a/tests/ImageSharp.Tests46/TestImages/Formats/Jpg/geneserath.jpg b/tests/ImageSharp.Tests46/TestImages/Formats/Jpg/geneserath.jpg new file mode 100644 index 000000000..af0c04a1c --- /dev/null +++ b/tests/ImageSharp.Tests46/TestImages/Formats/Jpg/geneserath.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71e5b1ba181a5b17ba9814c2025650de592efabf4062bd77baa3c8e774df007 +size 223841