diff --git a/src/ImageSharp46/Common/Extensions/ComparableExtensions.cs b/src/ImageSharp46/Common/Extensions/ComparableExtensions.cs
index 8f056ff9d..6cc2eb588 100644
--- a/src/ImageSharp46/Common/Extensions/ComparableExtensions.cs
+++ b/src/ImageSharp46/Common/Extensions/ComparableExtensions.cs
@@ -3,6 +3,8 @@
// Licensed under the Apache License, Version 2.0.
//
+using System.Runtime.CompilerServices;
+
namespace ImageSharp
{
using System;
@@ -94,6 +96,8 @@ namespace ImageSharp
///
/// The representing the clamped value.
///
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float Clamp(this float value, float min, float max)
{
if (value > max)
diff --git a/src/ImageSharp46/Formats/Jpg/Components/Block.cs b/src/ImageSharp46/Formats/Jpg/Components/Block.cs
index ca9d15883..f41e615b5 100644
--- a/src/ImageSharp46/Formats/Jpg/Components/Block.cs
+++ b/src/ImageSharp46/Formats/Jpg/Components/Block.cs
@@ -5,13 +5,14 @@
using System;
using System.Buffers;
+using System.Runtime.CompilerServices;
namespace ImageSharp.Formats
{
///
/// Represents an 8x8 block of coefficients to transform and encode.
///
- internal struct Block : IDisposable
+ public struct Block : IDisposable
{
private static ArrayPool IntArrayPool = ArrayPool.Create(BlockSize, 50);
@@ -67,7 +68,9 @@ namespace ImageSharp.Formats
///
public int this[int index]
{
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
get { return this.Data[index]; }
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
set { this.Data[index] = value; }
}
@@ -98,5 +101,11 @@ namespace ImageSharp.Formats
}
}
+ public Block Clone()
+ {
+ Block clone = Create();
+ Array.Copy(Data, clone.Data, BlockSize);
+ return clone;
+ }
}
}
diff --git a/src/ImageSharp46/Formats/Jpg/Components/FloatIDCT.cs b/src/ImageSharp46/Formats/Jpg/Components/FloatIDCT.cs
new file mode 100644
index 000000000..5f2bd68b9
--- /dev/null
+++ b/src/ImageSharp46/Formats/Jpg/Components/FloatIDCT.cs
@@ -0,0 +1,208 @@
+using System;
+using System.Buffers;
+
+namespace ImageSharp.Formats
+{
+ internal class FloatIDCT
+ {
+ //private float[] _temp = new float[64];
+
+ // Cosine matrix and transposed cosine matrix
+ private static readonly float[] c = buildC();
+ private static readonly float[] cT = buildCT();
+
+ internal FloatIDCT()
+ {
+#if DYNAMIC_IDCT
+ dynamicIDCT = dynamicIDCT ?? EmitIDCT();
+#endif
+ }
+
+ ///
+ /// Precomputes cosine terms in A.3.3 of
+ /// http://www.w3.org/Graphics/JPEG/itu-t81.pdf
+ ///
+ /// Closely follows the term precomputation in the
+ /// Java Advanced Imaging library.
+ ///
+ private static float[] buildC()
+ {
+ float[] c = new float[64];
+
+ for (int i = 0; i < 8; i++) // i == u or v
+ {
+ for (int j = 0; j < 8; j++) // j == x or y
+ {
+ c[i*8 + j] = i == 0 ?
+ 0.353553391f : /* 1 / SQRT(8) */
+ (float)(0.5 * Math.Cos(((2.0 * j + 1) * i * Math.PI) / 16.0));
+ }
+ }
+
+ return c;
+ }
+ private static float[] buildCT()
+ {
+ // Transpose i,k <-- j,i
+ float[] cT = new float[64];
+ for (int i = 0; i < 8; i++)
+ for (int j = 0; j < 8; j++)
+ cT[j * 8 + i] = c[i * 8 + j];
+ return cT;
+ }
+
+ public static void SetValueClipped(byte[,] arr, int i, int j, float val)
+ {
+ // Clip into the 0...255 range & round
+ arr[i, j] = val < 0 ? (byte)0
+ : val > 255 ? (byte)255
+ : (byte)(val + 0.5);
+ }
+
+ public static void Transform(ref Block block) => FastIDCT(block.Data);
+
+ /// See figure A.3.3 IDCT (informative) on A-5.
+ /// http://www.w3.org/Graphics/JPEG/itu-t81.pdf
+ public static void FastIDCT(int[] output)
+ {
+ //byte[,] output = new byte[8, 8];
+ //int[] output = new int[64];
+
+ float[] _temp = ArrayPool.Shared.Rent(64);
+
+ float[] input = ArrayPool.Shared.Rent(64);
+
+ for (int i = 0; i < output.Length; i++)
+ {
+ input[i] = output[i];
+ }
+
+ float temp, val = 0;
+ int idx = 0;
+ for (int i = 0; i < 8; i++)
+ {
+ int i8 = i * 8;
+ for (int j = 0; j < 8; j++)
+ {
+ val = 0;
+
+ for (int k = 0; k < 8; k++)
+ {
+ val += input[i8 + k] * c[k*8 + j];
+ }
+
+ _temp[idx++] = val;
+ }
+ }
+ for (int i = 0; i < 8; i++)
+ {
+ int i8 = i*8;
+ for (int j = 0; j < 8; j++)
+ {
+ temp = 128f;
+
+ for (int k = 0; k < 8; k++)
+ {
+ temp += cT[i*8 + k] * _temp[k * 8 + j];
+ }
+
+ if (temp < 0) output[i8 + j] = 0;
+ else if (temp > 255) output[i8+ j] = 255;
+ else output[i8 + j] = (int)(temp + 0.5); // Implements rounding
+ }
+ }
+
+ ArrayPool.Shared.Return(input, true);
+ ArrayPool.Shared.Return(_temp, true);
+ }
+
+
+
+#if DYNAMIC_IDCT
+
+///
+/// Generates a pure-IL nonbranching stream of instructions
+/// that perform the inverse DCT. Relies on helper function
+/// SetValueClipped.
+///
+/// A delegate to the DynamicMethod
+ private static IDCTFunc EmitIDCT()
+ {
+ Type[] args = { typeof(float[]), typeof(float[]), typeof(byte[,]) };
+
+ DynamicMethod idctMethod = new DynamicMethod("dynamicIDCT",
+ null, // no return type
+ args); // input arrays
+
+ ILGenerator il = idctMethod.GetILGenerator();
+
+ int idx = 0;
+
+ for (int i = 0; i < 8; i++)
+ {
+ for (int j = 0; j < 8; j++)
+ {
+ il.Emit(OpCodes.Ldarg_1); // 1 {temp}
+ il.Emit(OpCodes.Ldc_I4_S, (short)idx++); // 3 {temp, idx}
+
+ for (int k = 0; k < 8; k++)
+ {
+ il.Emit(OpCodes.Ldarg_0); // {in}
+ il.Emit(OpCodes.Ldc_I4_S, (short)(i * 8 + k)); // {in,idx}
+ il.Emit(OpCodes.Ldelem_R4); // {in[idx]}
+ il.Emit(OpCodes.Ldc_R4, c[k, j]); // {in[idx],c[k,j]}
+ il.Emit(OpCodes.Mul); // {in[idx]*c[k,j]}
+ if (k != 0) il.Emit(OpCodes.Add);
+ }
+
+ il.Emit(OpCodes.Stelem_R4); // {}
+ }
+ }
+
+ var meth = typeof(DCT).GetMethod("SetValueClipped",
+ BindingFlags.Static | BindingFlags.Public, null,
+ CallingConventions.Standard,
+ new Type[] {
+ typeof(byte[,]), // arr
+ typeof(int), // i
+ typeof(int), // j
+ typeof(float) } // val
+ , null);
+
+ for (int i = 0; i < 8; i++)
+ {
+ for (int j = 0; j < 8; j++)
+ {
+ il.Emit(OpCodes.Ldarg_2); // {output}
+ il.Emit(OpCodes.Ldc_I4_S, (short)i); // {output,i}
+ il.Emit(OpCodes.Ldc_I4_S, (short)j); // X={output,i,j}
+
+ il.Emit(OpCodes.Ldc_R4, 128.0f); // {X,128.0f}
+
+ for (int k = 0; k < 8; k++)
+ {
+ il.Emit(OpCodes.Ldarg_1); // {X,temp}
+ il.Emit(OpCodes.Ldc_I4_S,
+ (short)(k * 8 + j)); // {X,temp,idx}
+ il.Emit(OpCodes.Ldelem_R4); // {X,temp[idx]}
+ il.Emit(OpCodes.Ldc_R4, cT[i, k]); // {X,temp[idx],cT[i,k]}
+ il.Emit(OpCodes.Mul); // {X,in[idx]*c[k,j]}
+ il.Emit(OpCodes.Add);
+ }
+
+ il.EmitCall(OpCodes.Call, meth, null);
+ }
+ }
+
+ il.Emit(OpCodes.Ret);
+
+ return (IDCTFunc)idctMethod.CreateDelegate(typeof(IDCTFunc));
+ }
+
+ private delegate void IDCTFunc(float[] input, float[] temp, byte[,] output);
+ private static IDCTFunc dynamicIDCT = null;
+#endif
+
+
+ }
+}
\ No newline at end of file
diff --git a/src/ImageSharp46/Formats/Jpg/Components/Huffman.cs b/src/ImageSharp46/Formats/Jpg/Components/Huffman.cs
index cef99ea1a..345b45e0e 100644
--- a/src/ImageSharp46/Formats/Jpg/Components/Huffman.cs
+++ b/src/ImageSharp46/Formats/Jpg/Components/Huffman.cs
@@ -3,36 +3,41 @@
// Licensed under the Apache License, Version 2.0.
//
+using System;
+using System.Buffers;
+
namespace ImageSharp.Formats
{
///
/// Represents a Huffman tree
///
- internal struct Huffman
+ internal struct Huffman : IDisposable
{
- ///
- /// Initializes a new instance of the class.
- ///
- /// The log-2 size of the Huffman decoder's look-up table.
- /// The maximum (inclusive) number of codes in a Huffman tree.
- /// The maximum (inclusive) number of bits in a Huffman code.
- //public Huffman(int lutSize, int maxNCodes, int maxCodeLength)
- //{
- // this.Lut = new ushort[1 << lutSize];
- // this.Values = new byte[maxNCodes];
- // this.MinCodes = new int[maxCodeLength];
- // this.MaxCodes = new int[maxCodeLength];
- // this.Indices = new int[maxCodeLength];
- // this.Length = 0;
- //}
+ private static ArrayPool UshortBuffer =
+ //ArrayPool.Shared;
+ ArrayPool.Create(1 << JpegDecoderCore.LutSize, 50);
+
+ private static ArrayPool ByteBuffer =
+ //ArrayPool.Shared;
+ ArrayPool.Create(JpegDecoderCore.MaxNCodes, 50);
+
+ private static readonly ArrayPool IntBuffer =
+ //ArrayPool.Shared;
+ ArrayPool.Create(JpegDecoderCore.MaxCodeLength, 50);
public void Init(int lutSize, int maxNCodes, int maxCodeLength)
{
- this.Lut = new ushort[1 << lutSize];
- this.Values = new byte[maxNCodes];
- this.MinCodes = new int[maxCodeLength];
- this.MaxCodes = new int[maxCodeLength];
- this.Indices = new int[maxCodeLength];
+ //this.Lut = new ushort[1 << lutSize];
+ //this.Values = new byte[maxNCodes];
+ //this.MinCodes = new int[maxCodeLength];
+ //this.MaxCodes = new int[maxCodeLength];
+ //this.Indices = new int[maxCodeLength];
+
+ this.Lut = UshortBuffer.Rent(1 << lutSize);
+ this.Values = ByteBuffer.Rent(maxNCodes);
+ this.MinCodes = IntBuffer.Rent(maxCodeLength);
+ this.MaxCodes = IntBuffer.Rent(maxCodeLength); ;
+ this.Indices = IntBuffer.Rent(maxCodeLength); ;
}
///
@@ -69,6 +74,15 @@ namespace ImageSharp.Formats
/// Gets the array of indices. Indices[i] is the index into Values of MinCodes[i].
///
public int[] Indices;
+
+ public void Dispose()
+ {
+ UshortBuffer.Return(Lut, true);
+ ByteBuffer.Return(Values, true);
+ IntBuffer.Return(MinCodes, true);
+ IntBuffer.Return(MaxCodes, true);
+ IntBuffer.Return(Indices, true);
+ }
}
diff --git a/src/ImageSharp46/Formats/Jpg/Components/IDCT.cs b/src/ImageSharp46/Formats/Jpg/Components/IDCT.cs
index 4ca4ba903..88b493c4b 100644
--- a/src/ImageSharp46/Formats/Jpg/Components/IDCT.cs
+++ b/src/ImageSharp46/Formats/Jpg/Components/IDCT.cs
@@ -167,5 +167,6 @@ namespace ImageSharp.Formats
src[56 + x] = (y7 - y1) >> 14;
}
}
+
}
}
diff --git a/src/ImageSharp46/Formats/Jpg/Components/MagicDCT.cs b/src/ImageSharp46/Formats/Jpg/Components/MagicDCT.cs
new file mode 100644
index 000000000..6c71ec92e
--- /dev/null
+++ b/src/ImageSharp46/Formats/Jpg/Components/MagicDCT.cs
@@ -0,0 +1,572 @@
+using System;
+using System.Buffers;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+
+namespace ImageSharp.Formats
+{
+ public struct Span
+ where T : struct
+ {
+ public T[] Data;
+ public int Offset;
+
+ public Span(int size, int offset = 0)
+ {
+ Data = new T[size];
+ Offset = offset;
+ }
+
+ public Span(T[] data, int offset = 0)
+ {
+ Data = data;
+ Offset = offset;
+ }
+
+ public T this[int idx]
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)] get { return Data[idx + Offset]; }
+ [MethodImpl(MethodImplOptions.AggressiveInlining)] set { Data[idx + Offset] = value; }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public Span Slice(int offset)
+ {
+ return new Span(Data, Offset + offset);
+ }
+
+ public static implicit operator Span(T[] data) => new Span(data, 0);
+
+ private static readonly ArrayPool Pool = ArrayPool.Create(128, 10);
+
+ public static Span RentFromPool(int size, int offset = 0)
+ {
+ return new Span(Pool.Rent(size), offset);
+ }
+
+ public void ReturnToPool()
+ {
+ Pool.Return(Data, true);
+ Data = null;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void AddOffset(int offset)
+ {
+ Offset += offset;
+ }
+ }
+
+ public static class MagicDCT
+ {
+ private static readonly ArrayPool FloatArrayPool = ArrayPool.Create(Block.BlockSize, 50);
+
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Matrix4x4 Load(Span src, int x, int y)
+ {
+ int b0 = y*8 + x;
+ y++;
+ int b1 = y*8 + x;
+ y++;
+ int b2 = y*8 + x;
+ y++;
+ int b3 = y*8 + x;
+
+ return new Matrix4x4(
+ src[b0], src[b0 + 1], src[b0 + 2], src[b0 + 3],
+ src[b1], src[b1 + 1], src[b1 + 2], src[b1 + 3],
+ src[b2], src[b2 + 1], src[b2 + 2], src[b2 + 3],
+ src[b3], src[b3 + 1], src[b3 + 2], src[b3 + 3]
+ );
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void Store(Matrix4x4 s, Span d, int x, int y)
+ {
+ int b0 = y*8 + x;
+ y++;
+ int b1 = y*8 + x;
+ y++;
+ int b2 = y*8 + x;
+ y++;
+ int b3 = y*8 + x;
+
+ d[b0] = s.M11;
+ d[b0 + 1] = s.M12;
+ d[b0 + 2] = s.M13;
+ d[b0 + 3] = s.M14;
+ d[b1] = s.M21;
+ d[b1 + 1] = s.M22;
+ d[b1 + 2] = s.M23;
+ d[b1 + 3] = s.M24;
+ d[b2] = s.M31;
+ d[b2 + 1] = s.M32;
+ d[b2 + 2] = s.M33;
+ d[b2 + 3] = s.M34;
+ d[b3] = s.M41;
+ d[b3 + 1] = s.M42;
+ d[b3 + 2] = s.M43;
+ d[b3 + 3] = s.M44;
+ }
+
+ public static void Transpose8x8_SSE_Slow(Span data)
+ {
+ Matrix4x4 a11 = Load(data, 0, 0);
+ Matrix4x4 a12 = Load(data, 4, 0);
+ Matrix4x4 a21 = Load(data, 0, 4);
+ Matrix4x4 a22 = Load(data, 4, 4);
+
+ a11 = Matrix4x4.Transpose(a11);
+ a12 = Matrix4x4.Transpose(a12);
+ a21 = Matrix4x4.Transpose(a21);
+ a22 = Matrix4x4.Transpose(a22);
+
+ Store(a11, data, 0, 0);
+ Store(a21, data, 4, 0);
+ Store(a12, data, 0, 4);
+ Store(a22, data, 4, 4);
+ }
+
+ public static void Transpose8x8_SSE_Slow(Span src, Span dest)
+ {
+ Matrix4x4 a11 = Load(src, 0, 0);
+ Matrix4x4 a12 = Load(src, 4, 0);
+ Matrix4x4 a21 = Load(src, 0, 4);
+ Matrix4x4 a22 = Load(src, 4, 4);
+
+ a11 = Matrix4x4.Transpose(a11);
+ a12 = Matrix4x4.Transpose(a12);
+ a21 = Matrix4x4.Transpose(a21);
+ a22 = Matrix4x4.Transpose(a22);
+
+ Store(a11, dest, 0, 0);
+ Store(a21, dest, 4, 0);
+ Store(a12, dest, 0, 4);
+ Store(a22, dest, 4, 4);
+ }
+
+ public static void Transpose8x8(Span data)
+ {
+ for (int i = 1; i < 8; i++)
+ {
+ int i8 = i*8;
+ for (int j = 0; j < i; j++)
+ {
+ float tmp = data[i8 + j];
+ data[i8 + j] = data[j*8 + i];
+ data[j*8 + i] = tmp;
+ }
+ }
+ }
+
+ public static void Transpose8x8(Span src, Span dest)
+ {
+ for (int i = 0; i < 8; i++)
+ {
+ int i8 = i*8;
+ for (int j = 0; j < 8; j++)
+ {
+ dest[j*8 + i] = src[i8 + j];
+ }
+ }
+
+ //Matrix4x4 a11 = Load(src, 0, 0);
+ //Matrix4x4 a12 = Load(src, 4, 0);
+ //Matrix4x4 a21 = Load(src, 0, 4);
+ //Matrix4x4 a22 = Load(src, 4, 4);
+
+ //a11 = Matrix4x4.Transpose(a11);
+ //a12 = Matrix4x4.Transpose(a12);
+ //a21 = Matrix4x4.Transpose(a21);
+ //a22 = Matrix4x4.Transpose(a22);
+
+ //Store(a11, dest, 0, 0);
+ //Store(a21, dest, 4, 0);
+ //Store(a12, dest, 0, 4);
+ //Store(a22, dest, 4, 4);
+ }
+
+ public static void iDCT1Dllm_32f(Span y, Span x)
+ {
+ float a0, a1, a2, a3, b0, b1, b2, b3;
+ float z0, z1, z2, z3, z4;
+
+ float r0 = 1.414214f;
+ float r1 = 1.387040f;
+ float r2 = 1.306563f;
+ float r3 = 1.175876f;
+ float r4 = 1.000000f;
+ float r5 = 0.785695f;
+ float r6 = 0.541196f;
+ float r7 = 0.275899f;
+
+ z0 = y[1] + y[7];
+ z1 = y[3] + y[5];
+ z2 = y[3] + y[7];
+ z3 = y[1] + y[5];
+ z4 = (z0 + z1)*r3;
+
+ z0 = z0*(-r3 + r7);
+ z1 = z1*(-r3 - r1);
+ z2 = z2*(-r3 - r5) + z4;
+ z3 = z3*(-r3 + r5) + z4;
+
+ b3 = y[7]*(-r1 + r3 + r5 - r7) + z0 + z2;
+ b2 = y[5]*(r1 + r3 - r5 + r7) + z1 + z3;
+ b1 = y[3]*(r1 + r3 + r5 - r7) + z1 + z2;
+ b0 = y[1]*(r1 + r3 - r5 - r7) + z0 + z3;
+
+ z4 = (y[2] + y[6])*r6;
+ z0 = y[0] + y[4];
+ z1 = y[0] - y[4];
+ z2 = z4 - y[6]*(r2 + r6);
+ z3 = z4 + y[2]*(r2 - r6);
+ a0 = z0 + z3;
+ a3 = z0 - z3;
+ a1 = z1 + z2;
+ a2 = z1 - z2;
+
+ x[0] = a0 + b0;
+ x[7] = a0 - b0;
+ x[1] = a1 + b1;
+ x[6] = a1 - b1;
+ x[2] = a2 + b2;
+ x[5] = a2 - b2;
+ x[3] = a3 + b3;
+ x[4] = a3 - b3;
+ }
+
+ public static void iDCT2D_llm(Span s, Span d, Span temp)
+ {
+ int j;
+
+ for (j = 0; j < 8; j++)
+ {
+ iDCT1Dllm_32f(s.Slice(j*8), temp.Slice(j*8));
+ }
+
+ Transpose8x8(temp, d);
+
+ for (j = 0; j < 8; j++)
+ {
+ iDCT1Dllm_32f(d.Slice(j*8), temp.Slice(j*8));
+ }
+
+ Transpose8x8(temp, d);
+
+ for (j = 0; j < 64; j++)
+ {
+ d[j] *= 0.125f;
+ }
+ }
+
+ public static void IDCT(ref Block block)
+ {
+ Span src = Span.RentFromPool(64);
+
+ for (int i = 0; i < 64; i++)
+ {
+ src[i] = block[i];
+ }
+
+ Span dest = Span.RentFromPool(64);
+ Span temp = Span.RentFromPool(64);
+
+ //iDCT2D_llm(src, dest, temp);
+ //iDCT8x8GT(src, dest);
+ iDCT8x8_llm_sse(src, dest, temp);
+
+ for (int i = 0; i < 64; i++)
+ {
+ block[i] = (int) (dest[i] + 0.5f);
+ }
+
+ src.ReturnToPool();
+ dest.ReturnToPool();
+ temp.ReturnToPool();
+ }
+
+ public static void iDCT8x8GT(Span s, Span d)
+ {
+ idct81d_sse_GT(s, d);
+
+ Transpose8x8(d);
+
+ idct81d_sse_GT(d, d);
+
+ Transpose8x8(d);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector4 _mm_load_ps(Span src, int offset)
+ {
+ src = src.Slice(offset);
+ return new Vector4(src[0], src[1], src[2], src[3]);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector4 _mm_load_ps(Span src)
+ {
+ return new Vector4(src[0], src[1], src[2], src[3]);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void _mm_store_ps(Span dest, int offset, Vector4 src)
+ {
+ dest = dest.Slice(offset);
+ dest[0] = src.X;
+ dest[1] = src.Y;
+ dest[2] = src.Z;
+ dest[3] = src.W;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void _mm_store_ps(Span dest, Vector4 src)
+ {
+ dest[0] = src.X;
+ dest[1] = src.Y;
+ dest[2] = src.Z;
+ dest[3] = src.W;
+ }
+
+
+ public static void idct81d_sse_GT(Span src, Span dst)
+ {
+ Vector4 c1414 = new Vector4(1.4142135623731f);
+ Vector4 c0250 = new Vector4(0.25f);
+ Vector4 c0353 = new Vector4(0.353553390593274f);
+ Vector4 c0707 = new Vector4(0.707106781186547f);
+
+ for (int i = 0; i < 2; i++)
+ {
+ Vector4 ms0 = _mm_load_ps(src, 0);
+ Vector4 ms1 = _mm_load_ps(src, 8);
+ Vector4 ms2 = _mm_load_ps(src, 16);
+ Vector4 ms3 = _mm_load_ps(src, 24);
+ Vector4 ms4 = _mm_load_ps(src, 32);
+ Vector4 ms5 = _mm_load_ps(src, 40);
+ Vector4 ms6 = _mm_load_ps(src, 48);
+ Vector4 ms7 = _mm_load_ps(src, 56);
+
+ Vector4 mx00 = (c1414*ms0);
+
+ Vector4 mx01 = ((new Vector4(1.38703984532215f)*ms1) + (new Vector4(0.275899379282943f)*ms7));
+ Vector4 mx02 = ((new Vector4(1.30656296487638f)*ms2) + (new Vector4(0.541196100146197f)*ms6));
+ Vector4 mx03 = ((new Vector4(1.17587560241936f)*ms3) + (new Vector4(0.785694958387102f)*ms5));
+
+ Vector4 mx04 = (c1414*ms4);
+
+ Vector4 mx05 = ((new Vector4(-0.785694958387102f)*ms3) + (new Vector4(+1.17587560241936f)*ms5));
+ Vector4 mx06 = ((new Vector4(0.541196100146197f)*ms2) + (new Vector4(-1.30656296487638f)*ms6));
+ Vector4 mx07 = ((new Vector4(-0.275899379282943f)*ms1) + (new Vector4(1.38703984532215f)*ms7));
+ Vector4 mx09 = (mx00 + mx04);
+ Vector4 mx0a = (mx01 + mx03);
+
+ Vector4 mx0b = (c1414*mx02);
+
+ Vector4 mx0c = (mx00 - mx04);
+ Vector4 mx0d = (mx01 - mx03);
+
+ Vector4 mx0e = (c0353*(mx09 - mx0b));
+ Vector4 mx0f = (c0353*(mx0c - mx0d));
+ Vector4 mx10 = (c0353*(mx0c - mx0d));
+ Vector4 mx11 = (c1414*mx06);
+
+ Vector4 mx12 = (mx05 + mx07);
+
+ Vector4 mx13 = (mx05 - mx07);
+
+ Vector4 mx14 = (c0353*(mx11 + mx12));
+ Vector4 mx15 = (c0353*(mx11 - mx12));
+ Vector4 mx16 = (new Vector4(0.5f)*mx13);
+
+ _mm_store_ps(dst, 0, ((c0250 + (mx09 + mx0b))*(c0353*mx0a)));
+ _mm_store_ps(dst, 8, (c0707*(mx0f + mx15)));
+ _mm_store_ps(dst, 16, (c0707*(mx0f - mx15)));
+ _mm_store_ps(dst, 24, (c0707*(mx0e + mx16)));
+ _mm_store_ps(dst, 32, (c0707*(mx0e - mx16)));
+ _mm_store_ps(dst, 40, (c0707*(mx10 - mx14)));
+ _mm_store_ps(dst, 48, (c0707*(mx10 + mx14)));
+
+ _mm_store_ps(dst, 56, ((c0250*(mx09 + mx0b)) - (c0353*mx0a)));
+
+ dst = dst.Slice(4);
+ src = src.Slice(4);
+ }
+ }
+
+ private static readonly Vector4 _1_175876 = new Vector4(1.175876f);
+ private static readonly Vector4 _1_961571 = new Vector4(-1.961571f);
+ private static readonly Vector4 _0_390181 = new Vector4(-0.390181f);
+ private static readonly Vector4 _0_899976 = new Vector4(-0.899976f);
+ private static readonly Vector4 _2_562915 = new Vector4(-2.562915f);
+ private static readonly Vector4 _0_298631 = new Vector4(0.298631f);
+ private static readonly Vector4 _2_053120 = new Vector4(2.053120f);
+ private static readonly Vector4 _3_072711 = new Vector4(3.072711f);
+ private static readonly Vector4 _1_501321 = new Vector4(1.501321f);
+ private static readonly Vector4 _0_541196 = new Vector4(0.541196f);
+ private static readonly Vector4 _1_847759 = new Vector4(-1.847759f);
+ private static readonly Vector4 _0_765367 = new Vector4(0.765367f);
+
+ public static void iDCT2D8x4_32f(Span y, Span x)
+ {
+ /*
+ float a0,a1,a2,a3,b0,b1,b2,b3; float z0,z1,z2,z3,z4; float r[8]; int i;
+ for(i = 0;i < 8;i++){ r[i] = (float)(cos((double)i / 16.0 * M_PI) * M_SQRT2); }
+ */
+ /*
+ 0: 1.414214
+ 1: 1.387040
+ 2: 1.306563
+ 3:
+ 4: 1.000000
+ 5: 0.785695
+ 6:
+ 7: 0.275899
+ */
+ Vector4 my1 = _mm_load_ps(y, 8);
+ Vector4 my7 = _mm_load_ps(y, 56);
+ Vector4 mz0 = my1 + my7;
+
+ Vector4 my3 = _mm_load_ps(y, 24);
+ Vector4 mz2 = my3 + my7;
+ Vector4 my5 = _mm_load_ps(y, 40);
+ Vector4 mz1 = my3 + my5;
+ Vector4 mz3 = my1 + my5;
+
+ Vector4 mz4 = ((mz0 + mz1)* _1_175876);
+ //z0 = y[1] + y[7]; z1 = y[3] + y[5]; z2 = y[3] + y[7]; z3 = y[1] + y[5];
+ //z4 = (z0 + z1) * r[3];
+
+ mz2 = mz2* _1_961571 + mz4;
+ mz3 = mz3* _0_390181 + mz4;
+ mz0 = mz0* _0_899976;
+ mz1 = mz1* _2_562915;
+
+ /*
+ -0.899976
+ -2.562915
+ -1.961571
+ -0.390181
+ z0 = z0 * (-r[3] + r[7]);
+ z1 = z1 * (-r[3] - r[1]);
+ z2 = z2 * (-r[3] - r[5]) + z4;
+ z3 = z3 * (-r[3] + r[5]) + z4;*/
+
+
+ Vector4 mb3 = my7* _0_298631 + mz0 + mz2;
+ Vector4 mb2 = my5* _2_053120 + mz1 + mz3;
+ Vector4 mb1 = my3* _3_072711 + mz1 + mz2;
+ Vector4 mb0 = my1* _1_501321 + mz0 + mz3;
+
+ /*
+ 0.298631
+ 2.053120
+ 3.072711
+ 1.501321
+ b3 = y[7] * (-r[1] + r[3] + r[5] - r[7]) + z0 + z2;
+ b2 = y[5] * ( r[1] + r[3] - r[5] + r[7]) + z1 + z3;
+ b1 = y[3] * ( r[1] + r[3] + r[5] - r[7]) + z1 + z2;
+ b0 = y[1] * ( r[1] + r[3] - r[5] - r[7]) + z0 + z3;
+ */
+
+ Vector4 my2 = _mm_load_ps(y, 16);
+ Vector4 my6 = _mm_load_ps(y, 48);
+ mz4 = (my2 + my6)* _0_541196;
+ Vector4 my0 = _mm_load_ps(y, 0);
+ Vector4 my4 = _mm_load_ps(y, 32);
+ mz0 = my0 + my4;
+ mz1 = my0 - my4;
+
+ mz2 = mz4 + my6* _1_847759;
+ mz3 = mz4 + my2* _0_765367;
+
+ my0 = mz0 + mz3;
+ my3 = mz0 - mz3;
+ my1 = mz1 + mz2;
+ my2 = mz1 - mz2;
+ /*
+ 1.847759
+ 0.765367
+ z4 = (y[2] + y[6]) * r[6];
+ z0 = y[0] + y[4]; z1 = y[0] - y[4];
+ z2 = z4 - y[6] * (r[2] + r[6]);
+ z3 = z4 + y[2] * (r[2] - r[6]);
+ a0 = z0 + z3; a3 = z0 - z3;
+ a1 = z1 + z2; a2 = z1 - z2;
+ */
+
+ _mm_store_ps(x, 0, my0 + mb0);
+
+ _mm_store_ps(x, 56, my0 - mb0);
+
+ _mm_store_ps(x, 8, my1 + mb1);
+
+ _mm_store_ps(x, 48, my1 - mb1);
+
+ _mm_store_ps(x, 16, my2 + mb2);
+
+ _mm_store_ps(x, 40, my2 - mb2);
+
+ _mm_store_ps(x, 24, my3 + mb3);
+
+ _mm_store_ps(x, 32, my3 - mb3);
+ /*
+ x[0] = a0 + b0; x[7] = a0 - b0;
+ x[1] = a1 + b1; x[6] = a1 - b1;
+ x[2] = a2 + b2; x[5] = a2 - b2;
+ x[3] = a3 + b3; x[4] = a3 - b3;
+ for(i = 0;i < 8;i++){ x[i] *= 0.353554f; }
+ */
+ }
+
+ public static void iDCT8x8_llm_sse(Span s, Span d, Span temp)
+ {
+ Transpose8x8(s, temp);
+ iDCT2D8x4_32f(temp, d);
+
+ iDCT2D8x4_32f(temp.Slice(4), d.Slice(4));
+
+ Transpose8x8(d, temp);
+
+ iDCT2D8x4_32f(temp, d);
+
+ iDCT2D8x4_32f(temp.Slice(4), d.Slice(4));
+
+ Vector4 c = new Vector4(0.1250f);
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//0
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//1
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//2
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//3
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//4
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//5
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//6
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//7
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//8
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//9
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//10
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//11
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//12
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//13
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//14
+
+ _mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//15
+ }
+}
+}
\ No newline at end of file
diff --git a/src/ImageSharp46/Formats/Jpg/JpegDecoderCore.cs b/src/ImageSharp46/Formats/Jpg/JpegDecoderCore.cs
index e5191069a..2f0b22829 100644
--- a/src/ImageSharp46/Formats/Jpg/JpegDecoderCore.cs
+++ b/src/ImageSharp46/Formats/Jpg/JpegDecoderCore.cs
@@ -3,6 +3,8 @@
// Licensed under the Apache License, Version 2.0.
//
+using System.Runtime.CompilerServices;
+
namespace ImageSharp.Formats
{
using System;
@@ -17,17 +19,17 @@ namespace ImageSharp.Formats
///
/// The maximum (inclusive) number of bits in a Huffman code.
///
- private const int MaxCodeLength = 16;
+ internal const int MaxCodeLength = 16;
///
/// The maximum (inclusive) number of codes in a Huffman tree.
///
- private const int MaxNCodes = 256;
+ internal const int MaxNCodes = 256;
///
/// The log-2 size of the Huffman decoder's look-up table.
///
- private const int LutSize = 8;
+ internal const int LutSize = 8;
///
/// The maximum number of color components
@@ -1401,7 +1403,7 @@ namespace ImageSharp.Formats
byte cr = this.ycbcrImage.CrChannel[co + (x / scale)];
TColor packed = default(TColor);
- this.PackYcbCr(ref packed, yy, cb, cr);
+ PackYcbCr(ref packed, yy, cb, cr);
pixels[x, y] = packed;
}
});
@@ -1497,8 +1499,8 @@ namespace ImageSharp.Formats
this.ReadFull(this.temp, 0, remaining);
byte scanComponentCount = this.temp[0];
- int scanComponentCountBy2 = 2 * scanComponentCount;
- if (remaining != 4 + scanComponentCountBy2)
+ int scanComponentCountX2 = 2 * scanComponentCount;
+ if (remaining != 4 + scanComponentCountX2)
{
throw new ImageFormatException("SOS length inconsistent with number of components");
}
@@ -1539,10 +1541,10 @@ namespace ImageSharp.Formats
if (this.isProgressive)
{
- zigStart = this.temp[1 + scanComponentCountBy2];
- zigEnd = this.temp[2 + scanComponentCountBy2];
- ah = this.temp[3 + scanComponentCountBy2] >> 4;
- al = this.temp[3 + scanComponentCountBy2] & 0x0f;
+ zigStart = this.temp[1 + scanComponentCountX2];
+ zigEnd = this.temp[2 + scanComponentCountX2];
+ ah = this.temp[3 + scanComponentCountX2] >> 4;
+ al = this.temp[3 + scanComponentCountX2] & 0x0f;
if ((zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || Block.BlockSize <= zigEnd)
{
@@ -1655,11 +1657,7 @@ namespace ImageSharp.Formats
var qtIndex = this.componentArray[compIndex].Selector;
- // Load the previous partially decoded coefficients, if applicable.
-
- //b = this.isProgressive ? this.progCoeffs[compIndex][blockIndex] : new Block();
-
- if (this.isProgressive)
+ if (this.isProgressive) // Load the previous partially decoded coefficients, if applicable.
{
blockIndex = ((@by * mxx) * hi) + bx;
ProcessBlockImpl(ah,
@@ -1796,9 +1794,7 @@ namespace ImageSharp.Formats
{
// We haven't completely decoded this 8x8 block. Save the coefficients.
- // TODO: This should be broken when isProgressive == true
-
- this.progCoeffs[compIndex][((@by*mxx)*hi) + bx] = b;
+ this.progCoeffs[compIndex][((@by*mxx)*hi) + bx] = b.Clone();
// At this point, we could execute the rest of the loop body to dequantize and
// perform the inverse DCT, to save early stages of a progressive image to the
@@ -1815,7 +1811,9 @@ namespace ImageSharp.Formats
b[Unzig[zig]] *= qt[zig];
}
- IDCT.Transform(ref b);
+ //IDCT.Transform(ref b);
+ //FloatIDCT.Transform(ref b);
+ MagicDCT.IDCT(ref b);
byte[] dst;
int offset;
@@ -2168,7 +2166,8 @@ namespace ImageSharp.Formats
/// The y luminance component.
/// The cb chroma component.
/// The cr chroma component.
- private void PackYcbCr(ref TColor packed, byte y, byte cb, byte cr)
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void PackYcbCr(ref TColor packed, byte y, byte cb, byte cr)
where TColor : struct, IPackedPixel
where TPacked : struct
{
@@ -2273,6 +2272,11 @@ namespace ImageSharp.Formats
Block.DisposeAll(blocks);
}
}
+
+ for (int i = 0; i < huffmanTrees.Length; i++)
+ {
+ huffmanTrees[i].Dispose();
+ }
}
}
}
diff --git a/src/ImageSharp46/ImageSharp46.csproj b/src/ImageSharp46/ImageSharp46.csproj
index e88dbb0e2..fdb1b7293 100644
--- a/src/ImageSharp46/ImageSharp46.csproj
+++ b/src/ImageSharp46/ImageSharp46.csproj
@@ -230,9 +230,11 @@
+
+
diff --git a/tests/ImageSharp.Tests46/DctSandbox.cs b/tests/ImageSharp.Tests46/DctSandbox.cs
new file mode 100644
index 000000000..d3b1bacb2
--- /dev/null
+++ b/tests/ImageSharp.Tests46/DctSandbox.cs
@@ -0,0 +1,97 @@
+using System.Numerics;
+using System.Text;
+using ImageSharp.Formats;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace ImageSharp.Tests
+{
+ public class DctSandbox
+ {
+
+ private ITestOutputHelper Output { get; }
+
+ public DctSandbox(ITestOutputHelper output)
+ {
+ Output = output;
+ }
+
+ private float[] CreateTestData()
+ {
+ float[] result =new float[64];
+ for (int i = 0; i < 8; i++)
+ {
+ for (int j = 0; j < 8; j++)
+ {
+ result[i*8 + j] = i*10 + j;
+ }
+ }
+ return result;
+ }
+
+ private void Print(float[] data)
+ {
+ StringBuilder bld = new StringBuilder();
+ for (int i = 0; i < 8; i++)
+ {
+ for (int j = 0; j < 8; j++)
+ {
+ bld.Append($"{data[i * 8 + j],3} ");
+ }
+ bld.AppendLine();
+ }
+
+ Output.WriteLine(bld.ToString());
+ }
+
+ [Fact]
+ public void Mennyi()
+ {
+ Output.WriteLine(Vector.IsHardwareAccelerated.ToString());
+ Output.WriteLine(Vector.Count.ToString());
+ }
+
+ [Fact]
+ public void CheckTestData()
+ {
+ var data = CreateTestData();
+
+ Print(data);
+ }
+
+ [Fact]
+ public void Load_Store()
+ {
+ var data = CreateTestData();
+
+ var m = MagicDCT.Load(data, 1, 1);
+ m = Matrix4x4.Transpose(m);
+
+ MagicDCT.Store(m, data, 4, 4);
+
+ Print(data);
+ }
+
+ [Fact]
+ public void Transpose8x8()
+ {
+ var data = CreateTestData();
+
+ Span result = new Span(64);
+
+ MagicDCT.Transpose8x8(data, result);
+
+ Print(result.Data);
+ }
+
+ [Fact]
+ public void Transpose8x8_Inplace()
+ {
+ var data = CreateTestData();
+
+ MagicDCT.Transpose8x8(data);
+
+ Print(data);
+ }
+ }
+}
\ No newline at end of file
diff --git a/tests/ImageSharp.Tests46/Formats/Bmp/BitmapTests.cs b/tests/ImageSharp.Tests46/Formats/Bmp/BitmapTests.cs
index 549ac05ef..c91b0ad1b 100644
--- a/tests/ImageSharp.Tests46/Formats/Bmp/BitmapTests.cs
+++ b/tests/ImageSharp.Tests46/Formats/Bmp/BitmapTests.cs
@@ -3,6 +3,8 @@
// Licensed under the Apache License, Version 2.0.
//
+using ImageSharp.Formats;
+
namespace ImageSharp.Tests
{
using System.IO;
diff --git a/tests/ImageSharp.Tests46/Formats/Jpg/JpegTests.cs b/tests/ImageSharp.Tests46/Formats/Jpg/JpegTests.cs
new file mode 100644
index 000000000..57bce1504
--- /dev/null
+++ b/tests/ImageSharp.Tests46/Formats/Jpg/JpegTests.cs
@@ -0,0 +1,85 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using ImageSharp.Formats;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace ImageSharp.Tests.Formats.Jpg
+{
+ public class JpegTests
+ {
+
+ public const string TestOutputDirectory = "TestOutput/Jpeg";
+
+ private ITestOutputHelper Output { get; }
+
+ public JpegTests(ITestOutputHelper output)
+ {
+ Output = output;
+ }
+
+ protected string CreateTestOutputFile(string fileName)
+ {
+ if (!Directory.Exists(TestOutputDirectory))
+ {
+ Directory.CreateDirectory(TestOutputDirectory);
+ }
+
+ //string id = Guid.NewGuid().ToString().Substring(0, 4);
+
+ string ext = Path.GetExtension(fileName);
+ fileName = Path.GetFileNameWithoutExtension(fileName);
+
+ return $"{TestOutputDirectory}/{fileName}{ext}";
+ }
+
+ protected Stream CreateOutputStream(string fileName)
+ {
+ fileName = CreateTestOutputFile(fileName);
+ Output?.WriteLine("Opened for write: "+fileName);
+ return File.OpenWrite(fileName);
+ }
+
+ public static IEnumerable