Browse Source

JpegDecoderCore SIMD optimizations applied to the .NET Core project

af/merge-core
Anton Firszov 9 years ago
parent
commit
d84625b8d3
  1. 113
      src/ImageSharp/Formats/Jpg/Components/Block.cs
  2. 55
      src/ImageSharp/Formats/Jpg/Components/Block8x8F.Generated.cs
  3. 93
      src/ImageSharp/Formats/Jpg/Components/Block8x8F.Generated.tt
  4. 617
      src/ImageSharp/Formats/Jpg/Components/Block8x8F.cs
  5. 95
      src/ImageSharp/Formats/Jpg/Components/MutableSpan.cs
  6. 265
      src/ImageSharp/Formats/Jpg/JpegDecoderCore.cs
  7. 511
      tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs
  8. 54
      tests/ImageSharp.Tests/Formats/Jpg/DctTests.cs
  9. 364
      tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs
  10. 95
      tests/ImageSharp.Tests/Formats/Jpg/UtilityTestClassBase.cs

113
src/ImageSharp/Formats/Jpg/Components/Block.cs

@ -12,9 +12,9 @@ namespace ImageSharp.Formats
/// <summary>
/// Represents an 8x8 block of coefficients to transform and encode.
/// </summary>
public struct Block : IDisposable
internal struct Block : IDisposable
{
private static ArrayPool<int> IntArrayPool = ArrayPool<int>.Create(BlockSize, 50);
private static readonly ArrayPool<int> ArrayPool = ArrayPool<int>.Create(BlockSize, 50);
/// <summary>
/// Gets the size of the block.
@ -37,7 +37,7 @@ namespace ImageSharp.Formats
public void Init()
{
//this.Data = new int[BlockSize];
this.Data = IntArrayPool.Rent(BlockSize);
this.Data = ArrayPool.Rent(BlockSize);
}
public static Block Create()
@ -79,7 +79,7 @@ namespace ImageSharp.Formats
{
if (Data != null)
{
IntArrayPool.Return(Data, true);
ArrayPool.Return(Data, true);
Data = null;
}
}
@ -108,4 +108,109 @@ namespace ImageSharp.Formats
return clone;
}
}
/// <summary>
/// Temporal class to make refactoring easier.
/// 1. Refactor Block -> BlockF
/// 2. Test
/// 3. Refactor BlockF -> Block8x8F
/// </summary>
internal struct BlockF : IDisposable
{
private static readonly ArrayPool<float> ArrayPool = ArrayPool<float>.Create(BlockSize, 50);
/// <summary>
/// Gets the size of the block.
/// </summary>
public const int BlockSize = 64;
/// <summary>
/// The array of block data.
/// </summary>
public float[] Data;
/// <summary>
/// Initializes a new instance of the <see cref="Block"/> class.
/// </summary>
//public Block()
//{
// this.data = new int[BlockSize];
//}
public void Init()
{
//this.Data = new int[BlockSize];
this.Data = ArrayPool.Rent(BlockSize);
}
public static BlockF Create()
{
var block = new BlockF();
block.Init();
return block;
}
public static BlockF[] CreateArray(int size)
{
BlockF[] result = new BlockF[size];
for (int i = 0; i < result.Length; i++)
{
result[i].Init();
}
return result;
}
public bool IsInitialized => this.Data != null;
/// <summary>
/// Gets the pixel data at the given block index.
/// </summary>
/// <param name="index">The index of the data to return.</param>
/// <returns>
/// The <see cref="int"/>.
/// </returns>
public float this[int index]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get { return this.Data[index]; }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
set { this.Data[index] = value; }
}
// TODO: Refactor Block.Dispose() callers to always use 'using' or 'finally' statement!
public void Dispose()
{
if (Data != null)
{
ArrayPool.Return(Data, true);
Data = null;
}
}
public static void DisposeAll(BlockF[] blocks)
{
for (int i = 0; i < blocks.Length; i++)
{
blocks[i].Dispose();
}
}
public void Clear()
{
for (int i = 0; i < Data.Length; i++)
{
Data[i] = 0;
}
}
public BlockF Clone()
{
BlockF clone = Create();
Array.Copy(Data, clone.Data, BlockSize);
return clone;
}
}
}

55
src/ImageSharp/Formats/Jpg/Components/Block8x8F.Generated.cs

@ -0,0 +1,55 @@

using System;
using System.Numerics;
using System.Runtime.CompilerServices;
namespace ImageSharp.Formats
{
internal partial struct Block8x8F
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void TransposeInto(ref Block8x8F d)
{
d.V0L.X = V0L.X; d.V1L.X = V0L.Y; d.V2L.X = V0L.Z; d.V3L.X = V0L.W; d.V4L.X = V0R.X; d.V5L.X = V0R.Y; d.V6L.X = V0R.Z; d.V7L.X = V0R.W;
d.V0L.Y = V1L.X; d.V1L.Y = V1L.Y; d.V2L.Y = V1L.Z; d.V3L.Y = V1L.W; d.V4L.Y = V1R.X; d.V5L.Y = V1R.Y; d.V6L.Y = V1R.Z; d.V7L.Y = V1R.W;
d.V0L.Z = V2L.X; d.V1L.Z = V2L.Y; d.V2L.Z = V2L.Z; d.V3L.Z = V2L.W; d.V4L.Z = V2R.X; d.V5L.Z = V2R.Y; d.V6L.Z = V2R.Z; d.V7L.Z = V2R.W;
d.V0L.W = V3L.X; d.V1L.W = V3L.Y; d.V2L.W = V3L.Z; d.V3L.W = V3L.W; d.V4L.W = V3R.X; d.V5L.W = V3R.Y; d.V6L.W = V3R.Z; d.V7L.W = V3R.W;
d.V0R.X = V4L.X; d.V1R.X = V4L.Y; d.V2R.X = V4L.Z; d.V3R.X = V4L.W; d.V4R.X = V4R.X; d.V5R.X = V4R.Y; d.V6R.X = V4R.Z; d.V7R.X = V4R.W;
d.V0R.Y = V5L.X; d.V1R.Y = V5L.Y; d.V2R.Y = V5L.Z; d.V3R.Y = V5L.W; d.V4R.Y = V5R.X; d.V5R.Y = V5R.Y; d.V6R.Y = V5R.Z; d.V7R.Y = V5R.W;
d.V0R.Z = V6L.X; d.V1R.Z = V6L.Y; d.V2R.Z = V6L.Z; d.V3R.Z = V6L.W; d.V4R.Z = V6R.X; d.V5R.Z = V6R.Y; d.V6R.Z = V6R.Z; d.V7R.Z = V6R.W;
d.V0R.W = V7L.X; d.V1R.W = V7L.Y; d.V2R.W = V7L.Z; d.V3R.W = V7L.W; d.V4R.W = V7R.X; d.V5R.W = V7R.Y; d.V6R.W = V7R.Z; d.V7R.W = V7R.W;
}
public void CropInto(float min, float max, ref Block8x8F d)
{
Vector4 minVec = new Vector4(min);
Vector4 maxVec = new Vector4(max);
d.V0L = Vector4.Max(Vector4.Min(V0L, maxVec), minVec);d.V0R = Vector4.Max(Vector4.Min(V0R, maxVec), minVec);
d.V1L = Vector4.Max(Vector4.Min(V1L, maxVec), minVec);d.V1R = Vector4.Max(Vector4.Min(V1R, maxVec), minVec);
d.V2L = Vector4.Max(Vector4.Min(V2L, maxVec), minVec);d.V2R = Vector4.Max(Vector4.Min(V2R, maxVec), minVec);
d.V3L = Vector4.Max(Vector4.Min(V3L, maxVec), minVec);d.V3R = Vector4.Max(Vector4.Min(V3R, maxVec), minVec);
d.V4L = Vector4.Max(Vector4.Min(V4L, maxVec), minVec);d.V4R = Vector4.Max(Vector4.Min(V4R, maxVec), minVec);
d.V5L = Vector4.Max(Vector4.Min(V5L, maxVec), minVec);d.V5R = Vector4.Max(Vector4.Min(V5R, maxVec), minVec);
d.V6L = Vector4.Max(Vector4.Min(V6L, maxVec), minVec);d.V6R = Vector4.Max(Vector4.Min(V6R, maxVec), minVec);
d.V7L = Vector4.Max(Vector4.Min(V7L, maxVec), minVec);d.V7R = Vector4.Max(Vector4.Min(V7R, maxVec), minVec);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal void ColorifyInto(ref Block8x8F d)
{
d.V0L = Vector4.Max(Vector4.Min(V0L, CMax4), CMin4) + COff4;d.V0R = Vector4.Max(Vector4.Min(V0R, CMax4), CMin4) + COff4;
d.V1L = Vector4.Max(Vector4.Min(V1L, CMax4), CMin4) + COff4;d.V1R = Vector4.Max(Vector4.Min(V1R, CMax4), CMin4) + COff4;
d.V2L = Vector4.Max(Vector4.Min(V2L, CMax4), CMin4) + COff4;d.V2R = Vector4.Max(Vector4.Min(V2R, CMax4), CMin4) + COff4;
d.V3L = Vector4.Max(Vector4.Min(V3L, CMax4), CMin4) + COff4;d.V3R = Vector4.Max(Vector4.Min(V3R, CMax4), CMin4) + COff4;
d.V4L = Vector4.Max(Vector4.Min(V4L, CMax4), CMin4) + COff4;d.V4R = Vector4.Max(Vector4.Min(V4R, CMax4), CMin4) + COff4;
d.V5L = Vector4.Max(Vector4.Min(V5L, CMax4), CMin4) + COff4;d.V5R = Vector4.Max(Vector4.Min(V5R, CMax4), CMin4) + COff4;
d.V6L = Vector4.Max(Vector4.Min(V6L, CMax4), CMin4) + COff4;d.V6R = Vector4.Max(Vector4.Min(V6R, CMax4), CMin4) + COff4;
d.V7L = Vector4.Max(Vector4.Min(V7L, CMax4), CMin4) + COff4;d.V7R = Vector4.Max(Vector4.Min(V7R, CMax4), CMin4) + COff4;
}
}
}

93
src/ImageSharp/Formats/Jpg/Components/Block8x8F.Generated.tt

@ -0,0 +1,93 @@
<#@ template debug="false" hostspecific="false" language="C#" #>
<#@ assembly name="System.Core" #>
<#@ import namespace="System.Linq" #>
<#@ import namespace="System.Text" #>
<#@ import namespace="System.Collections.Generic" #>
<#@ output extension=".cs" #>
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
<#
char[] coordz = new[] {'X', 'Y', 'Z', 'W'};
#>
namespace ImageSharp.Formats
{
internal partial struct Block8x8F
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void TransposeInto(ref Block8x8F d)
{
<#
PushIndent(" ");
for (int i = 0; i < 8; i++)
{
char destCoord = coordz[i % 4];
char destSide = (i / 4) % 2 == 0 ? 'L' : 'R';
for (int j = 0; j < 8; j++)
{
char srcCoord = coordz[j % 4];
char srcSide = (j / 4) % 2 == 0 ? 'L' : 'R';
string expression = $"d.V{j}{destSide}.{destCoord} = V{i}{srcSide}.{srcCoord}; ";
//bld.Append(expression);
Write(expression);
}
//bld.AppendLine();
WriteLine("");
}
PopIndent();
//Write(bld.ToString());
#>
}
public void CropInto(float min, float max, ref Block8x8F d)
{
Vector4 minVec = new Vector4(min);
Vector4 maxVec = new Vector4(max);
<#
PushIndent(" ");
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 2; j++)
{
char side = j == 0 ? 'L' : 'R';
Write($"d.V{i}{side} = Vector4.Max(Vector4.Min(V{i}{side}, maxVec), minVec);");
}
WriteLine("");
}
PopIndent();
#>
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal void ColorifyInto(ref Block8x8F d)
{
<#
PushIndent(" ");
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 2; j++)
{
char side = j == 0 ? 'L' : 'R';
Write($"d.V{i}{side} = Vector4.Max(Vector4.Min(V{i}{side}, CMax4), CMin4) + COff4;");
}
WriteLine("");
}
PopIndent();
#>
}
}
}

617
src/ImageSharp/Formats/Jpg/Components/Block8x8F.cs

@ -0,0 +1,617 @@
using System;
using System.Buffers;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// ReSharper disable InconsistentNaming
namespace ImageSharp.Formats
{
/// <summary>
/// DCT code Ported from https://github.com/norishigefukushima/dct_simd
/// </summary>
internal partial struct Block8x8F
{
public Vector4 V0L;
public Vector4 V0R;
public Vector4 V1L;
public Vector4 V1R;
public Vector4 V2L;
public Vector4 V2R;
public Vector4 V3L;
public Vector4 V3R;
public Vector4 V4L;
public Vector4 V4R;
public Vector4 V5L;
public Vector4 V5R;
public Vector4 V6L;
public Vector4 V6R;
public Vector4 V7L;
public Vector4 V7R;
public const int VectorCount = 16;
public const int ScalarCount = VectorCount*4;
private static readonly ArrayPool<float> ScalarArrayPool = ArrayPool<float>.Create(ScalarCount, 50);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public unsafe void LoadFrom(MutableSpan<float> source)
{
fixed (Vector4* ptr = &V0L)
{
Marshal.Copy(source.Data, source.Offset, (IntPtr) ptr, ScalarCount);
//float* fp = (float*)ptr;
//for (int i = 0; i < ScalarCount; i++)
//{
// fp[i] = source[i];
//}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public unsafe void CopyTo(MutableSpan<float> dest)
{
fixed (Vector4* ptr = &V0L)
{
Marshal.Copy((IntPtr) ptr, dest.Data, dest.Offset, ScalarCount);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public unsafe void CopyTo(float[] dest)
{
fixed (Vector4* ptr = &V0L)
{
Marshal.Copy((IntPtr) ptr, dest, 0, ScalarCount);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe void LoadFrom(Block8x8F* blockPtr, MutableSpan<float> source)
{
Marshal.Copy(source.Data, source.Offset, (IntPtr) blockPtr, ScalarCount);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe void CopyTo(Block8x8F* blockPtr, MutableSpan<float> dest)
{
Marshal.Copy((IntPtr) blockPtr, dest.Data, dest.Offset, ScalarCount);
}
internal unsafe void LoadFrom(MutableSpan<int> source)
{
fixed (Vector4* ptr = &V0L)
{
float* fp = (float*) ptr;
for (int i = 0; i < ScalarCount; i++)
{
fp[i] = source[i];
}
}
}
internal unsafe void CopyTo(MutableSpan<int> dest)
{
fixed (Vector4* ptr = &V0L)
{
float* fp = (float*) ptr;
for (int i = 0; i < ScalarCount; i++)
{
dest[i] = (int) fp[i];
}
}
}
public unsafe void TransposeInplace()
{
fixed (Vector4* ptr = &V0L)
{
float* data = (float*) ptr;
for (int i = 1; i < 8; i++)
{
int i8 = i*8;
for (int j = 0; j < i; j++)
{
float tmp = data[i8 + j];
data[i8 + j] = data[j*8 + i];
data[j*8 + i] = tmp;
}
}
}
}
/// <summary>
/// Reference implementation we can benchmark against
/// </summary>
internal unsafe void TransposeInto_PinningImpl(ref Block8x8F destination)
{
fixed (Vector4* sPtr = &V0L)
{
float* src = (float*) sPtr;
fixed (Vector4* dPtr = &destination.V0L)
{
float* dest = (float*) dPtr;
for (int i = 0; i < 8; i++)
{
int i8 = i*8;
for (int j = 0; j < 8; j++)
{
dest[j*8 + i] = src[i8 + j];
}
}
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe void TransposeInto(Block8x8F* sourcePtr, Block8x8F* destPtr)
{
float* src = (float*) sourcePtr;
float* dest = (float*) destPtr;
for (int i = 0; i < 8; i++)
{
int i8 = i*8;
for (int j = 0; j < 8; j++)
{
dest[j*8 + i] = src[i8 + j];
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void MultiplyAllInplace(Vector4 s)
{
V0L *= s;
V0R *= s;
V1L *= s;
V1R *= s;
V2L *= s;
V2R *= s;
V3L *= s;
V3R *= s;
V4L *= s;
V4R *= s;
V5L *= s;
V5R *= s;
V6L *= s;
V6R *= s;
V7L *= s;
V7R *= s;
}
// ReSharper disable once InconsistentNaming
public void IDCTInto(ref Block8x8F dest, ref Block8x8F temp)
{
TransposeInto(ref temp);
temp.iDCT2D8x4_LeftPart(ref dest);
temp.iDCT2D8x4_RightPart(ref dest);
dest.TransposeInto(ref temp);
temp.iDCT2D8x4_LeftPart(ref dest);
temp.iDCT2D8x4_RightPart(ref dest);
dest.MultiplyAllInplace(_0_125);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void IDCTInplace()
{
Block8x8F result = new Block8x8F();
Block8x8F temp = new Block8x8F();
IDCTInto(ref result, ref temp);
this = result;
}
private static readonly Vector4 _1_175876 = new Vector4(1.175876f);
private static readonly Vector4 _1_961571 = new Vector4(-1.961571f);
private static readonly Vector4 _0_390181 = new Vector4(-0.390181f);
private static readonly Vector4 _0_899976 = new Vector4(-0.899976f);
private static readonly Vector4 _2_562915 = new Vector4(-2.562915f);
private static readonly Vector4 _0_298631 = new Vector4(0.298631f);
private static readonly Vector4 _2_053120 = new Vector4(2.053120f);
private static readonly Vector4 _3_072711 = new Vector4(3.072711f);
private static readonly Vector4 _1_501321 = new Vector4(1.501321f);
private static readonly Vector4 _0_541196 = new Vector4(0.541196f);
private static readonly Vector4 _1_847759 = new Vector4(-1.847759f);
private static readonly Vector4 _0_765367 = new Vector4(0.765367f);
private static readonly Vector4 _0_125 = new Vector4(0.1250f);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal void iDCT2D8x4_LeftPart(ref Block8x8F d)
{
/*
float a0,a1,a2,a3,b0,b1,b2,b3; float z0,z1,z2,z3,z4; float r[8]; int i;
for(i = 0;i < 8;i++){ r[i] = (float)(cos((double)i / 16.0 * M_PI) * M_SQRT2); }
*/
/*
0: 1.414214
1: 1.387040
2: 1.306563
3:
4: 1.000000
5: 0.785695
6:
7: 0.275899
*/
Vector4 my1 = V1L;
Vector4 my7 = V7L;
Vector4 mz0 = my1 + my7;
Vector4 my3 = V3L;
Vector4 mz2 = my3 + my7;
Vector4 my5 = V5L;
Vector4 mz1 = my3 + my5;
Vector4 mz3 = my1 + my5;
Vector4 mz4 = ((mz0 + mz1)*_1_175876);
//z0 = y[1] + y[7]; z1 = y[3] + y[5]; z2 = y[3] + y[7]; z3 = y[1] + y[5];
//z4 = (z0 + z1) * r[3];
mz2 = mz2*_1_961571 + mz4;
mz3 = mz3*_0_390181 + mz4;
mz0 = mz0*_0_899976;
mz1 = mz1*_2_562915;
/*
-0.899976
-2.562915
-1.961571
-0.390181
z0 = z0 * (-r[3] + r[7]);
z1 = z1 * (-r[3] - r[1]);
z2 = z2 * (-r[3] - r[5]) + z4;
z3 = z3 * (-r[3] + r[5]) + z4;*/
Vector4 mb3 = my7*_0_298631 + mz0 + mz2;
Vector4 mb2 = my5*_2_053120 + mz1 + mz3;
Vector4 mb1 = my3*_3_072711 + mz1 + mz2;
Vector4 mb0 = my1*_1_501321 + mz0 + mz3;
/*
0.298631
2.053120
3.072711
1.501321
b3 = y[7] * (-r[1] + r[3] + r[5] - r[7]) + z0 + z2;
b2 = y[5] * ( r[1] + r[3] - r[5] + r[7]) + z1 + z3;
b1 = y[3] * ( r[1] + r[3] + r[5] - r[7]) + z1 + z2;
b0 = y[1] * ( r[1] + r[3] - r[5] - r[7]) + z0 + z3;
*/
Vector4 my2 = V2L;
Vector4 my6 = V6L;
mz4 = (my2 + my6)*_0_541196;
Vector4 my0 = V0L;
Vector4 my4 = V4L;
mz0 = my0 + my4;
mz1 = my0 - my4;
mz2 = mz4 + my6*_1_847759;
mz3 = mz4 + my2*_0_765367;
my0 = mz0 + mz3;
my3 = mz0 - mz3;
my1 = mz1 + mz2;
my2 = mz1 - mz2;
/*
1.847759
0.765367
z4 = (y[2] + y[6]) * r[6];
z0 = y[0] + y[4]; z1 = y[0] - y[4];
z2 = z4 - y[6] * (r[2] + r[6]);
z3 = z4 + y[2] * (r[2] - r[6]);
a0 = z0 + z3; a3 = z0 - z3;
a1 = z1 + z2; a2 = z1 - z2;
*/
d.V0L = my0 + mb0;
d.V7L = my0 - mb0;
d.V1L = my1 + mb1;
d.V6L = my1 - mb1;
d.V2L = my2 + mb2;
d.V5L = my2 - mb2;
d.V3L = my3 + mb3;
d.V4L = my3 - mb3;
/*
x[0] = a0 + b0; x[7] = a0 - b0;
x[1] = a1 + b1; x[6] = a1 - b1;
x[2] = a2 + b2; x[5] = a2 - b2;
x[3] = a3 + b3; x[4] = a3 - b3;
for(i = 0;i < 8;i++){ x[i] *= 0.353554f; }
*/
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal void iDCT2D8x4_RightPart(ref Block8x8F d)
{
/*
float a0,a1,a2,a3,b0,b1,b2,b3; float z0,z1,z2,z3,z4; float r[8]; int i;
for(i = 0;i < 8;i++){ r[i] = (float)(cos((double)i / 16.0 * M_PI) * M_SQRT2); }
*/
/*
0: 1.414214
1: 1.387040
2: 1.306563
3:
4: 1.000000
5: 0.785695
6:
7: 0.275899
*/
Vector4 my1 = V1R;
Vector4 my7 = V7R;
Vector4 mz0 = my1 + my7;
Vector4 my3 = V3R;
Vector4 mz2 = my3 + my7;
Vector4 my5 = V5R;
Vector4 mz1 = my3 + my5;
Vector4 mz3 = my1 + my5;
Vector4 mz4 = ((mz0 + mz1)*_1_175876);
//z0 = y[1] + y[7]; z1 = y[3] + y[5]; z2 = y[3] + y[7]; z3 = y[1] + y[5];
//z4 = (z0 + z1) * r[3];
mz2 = mz2*_1_961571 + mz4;
mz3 = mz3*_0_390181 + mz4;
mz0 = mz0*_0_899976;
mz1 = mz1*_2_562915;
/*
-0.899976
-2.562915
-1.961571
-0.390181
z0 = z0 * (-r[3] + r[7]);
z1 = z1 * (-r[3] - r[1]);
z2 = z2 * (-r[3] - r[5]) + z4;
z3 = z3 * (-r[3] + r[5]) + z4;*/
Vector4 mb3 = my7*_0_298631 + mz0 + mz2;
Vector4 mb2 = my5*_2_053120 + mz1 + mz3;
Vector4 mb1 = my3*_3_072711 + mz1 + mz2;
Vector4 mb0 = my1*_1_501321 + mz0 + mz3;
/*
0.298631
2.053120
3.072711
1.501321
b3 = y[7] * (-r[1] + r[3] + r[5] - r[7]) + z0 + z2;
b2 = y[5] * ( r[1] + r[3] - r[5] + r[7]) + z1 + z3;
b1 = y[3] * ( r[1] + r[3] + r[5] - r[7]) + z1 + z2;
b0 = y[1] * ( r[1] + r[3] - r[5] - r[7]) + z0 + z3;
*/
Vector4 my2 = V2R;
Vector4 my6 = V6R;
mz4 = (my2 + my6)*_0_541196;
Vector4 my0 = V0R;
Vector4 my4 = V4R;
mz0 = my0 + my4;
mz1 = my0 - my4;
mz2 = mz4 + my6*_1_847759;
mz3 = mz4 + my2*_0_765367;
my0 = mz0 + mz3;
my3 = mz0 - mz3;
my1 = mz1 + mz2;
my2 = mz1 - mz2;
/*
1.847759
0.765367
z4 = (y[2] + y[6]) * r[6];
z0 = y[0] + y[4]; z1 = y[0] - y[4];
z2 = z4 - y[6] * (r[2] + r[6]);
z3 = z4 + y[2] * (r[2] - r[6]);
a0 = z0 + z3; a3 = z0 - z3;
a1 = z1 + z2; a2 = z1 - z2;
*/
d.V0R = my0 + mb0;
d.V7R = my0 - mb0;
d.V1R = my1 + mb1;
d.V6R = my1 - mb1;
d.V2R = my2 + mb2;
d.V5R = my2 - mb2;
d.V3R = my3 + mb3;
d.V4R = my3 - mb3;
/*
x[0] = a0 + b0; x[7] = a0 - b0;
x[1] = a1 + b1; x[6] = a1 - b1;
x[2] = a2 + b2; x[5] = a2 - b2;
x[3] = a3 + b3; x[4] = a3 - b3;
for(i = 0;i < 8;i++){ x[i] *= 0.353554f; }
*/
}
internal static void SuchIDCT(ref Block block)
{
Block8x8F source = new Block8x8F();
source.LoadFrom(block.Data);
Block8x8F dest = new Block8x8F();
Block8x8F temp = new Block8x8F();
source.IDCTInto(ref dest, ref temp);
dest.CopyTo(block.Data);
}
internal static void SuchIDCT(ref BlockF block)
{
Block8x8F source = new Block8x8F();
source.LoadFrom(block.Data);
Block8x8F dest = new Block8x8F();
Block8x8F temp = new Block8x8F();
source.IDCTInto(ref dest, ref temp);
dest.CopyTo(block.Data);
}
public unsafe float this[int idx]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
fixed (Block8x8F* p = &this)
{
float* fp = (float*) p;
return fp[idx];
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
set
{
fixed (Block8x8F* p = &this)
{
float* fp = (float*) p;
fp[idx] = value;
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static unsafe float GetScalarAt(Block8x8F* blockPtr, int idx)
{
float* fp = (float*) blockPtr;
return fp[idx];
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static unsafe void SetScalarAt(Block8x8F* blockPtr, int idx, float value)
{
float* fp = (float*) blockPtr;
fp[idx] = value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal void Clear()
{
this = new Block8x8F(); // LOL C# Plz!
}
internal void LoadFrom(ref BlockF legacyBlock)
{
LoadFrom(legacyBlock.Data);
}
internal void CopyTo(ref BlockF legacyBlock)
{
CopyTo(legacyBlock.Data);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static byte ToColorByte(float c)
{
if (c < -128)
{
return 0;
}
else if (c > 127)
{
return 255;
}
else
{
c += 128;
return (byte) c;
}
}
internal unsafe void CopyColorsTo(MutableSpan<byte> buffer, int stride)
{
fixed (Block8x8F* p = &this)
{
float* b = (float*) p;
for (int y = 0; y < 8; y++)
{
int y8 = y*8;
int yStride = y*stride;
for (int x = 0; x < 8; x++)
{
float c = b[y8 + x];
if (c < -128)
{
c = 0;
}
else if (c > 127)
{
c = 255;
}
else
{
c += 128;
}
buffer[yStride + x] = (byte) c;
}
}
}
}
private static readonly Vector4 CMin4 = new Vector4(-128f);
private static readonly Vector4 CMax4 = new Vector4(127f);
private static readonly Vector4 COff4 = new Vector4(128f);
/// <summary>
/// Level shift by +128, clip to [0, 255], and write to buffer.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal unsafe void CopyColorsTo(
MutableSpan<byte> buffer,
int stride,
Block8x8F* temp)
{
ColorifyInto(ref *temp);
float* src = (float*) temp;
for (int i = 0; i < 8; i++)
{
buffer[0] = (byte) src[0];
buffer[1] = (byte) src[1];
buffer[2] = (byte) src[2];
buffer[3] = (byte) src[3];
buffer[4] = (byte) src[4];
buffer[5] = (byte) src[5];
buffer[6] = (byte) src[6];
buffer[7] = (byte) src[7];
buffer.AddOffset(stride);
src += 8;
}
}
}
}

95
src/ImageSharp/Formats/Jpg/Components/MutableSpan.cs

@ -0,0 +1,95 @@
using System.Buffers;
using System.Numerics;
using System.Runtime.CompilerServices;
namespace ImageSharp.Formats
{
/// <summary>
/// Like corefxlab Span, but with an AddOffset() method for efficiency.
/// TODO: When Span will be official, consider replacing this class!
/// </summary>
/// <see cref="https://github.com/dotnet/corefxlab/blob/master/src/System.Slices/System/Span.cs"/>
/// <typeparam name="T"></typeparam>
internal struct MutableSpan<T>
{
public T[] Data;
public int Offset;
public int TotalCount => Data.Length - Offset;
public MutableSpan(int size, int offset = 0)
{
Data = new T[size];
Offset = offset;
}
public MutableSpan(T[] data, int offset = 0)
{
Data = data;
Offset = offset;
}
public T this[int idx]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)] get { return Data[idx + Offset]; }
[MethodImpl(MethodImplOptions.AggressiveInlining)] set { Data[idx + Offset] = value; }
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public MutableSpan<T> Slice(int offset)
{
return new MutableSpan<T>(Data, Offset + offset);
}
public static implicit operator MutableSpan<T>(T[] data) => new MutableSpan<T>(data, 0);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void AddOffset(int offset)
{
Offset += offset;
}
}
internal static class MutableSpanExtensions
{
public static MutableSpan<T> Slice<T>(this T[] array, int offset) => new MutableSpan<T>(array, offset);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void SaveTo(this MutableSpan<float> data, ref Vector4 v)
{
v.X = data[0];
v.Y = data[1];
v.Z = data[2];
v.W = data[3];
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void SaveTo(this MutableSpan<int> data, ref Vector4 v)
{
v.X = data[0];
v.Y = data[1];
v.Z = data[2];
v.W = data[3];
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void LoadFrom(this MutableSpan<float> data, ref Vector4 v)
{
data[0] = v.X;
data[1] = v.Y;
data[2] = v.Z;
data[3] = v.W;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void LoadFrom(this MutableSpan<int> data, ref Vector4 v)
{
data[0] = (int)v.X;
data[1] = (int)v.Y;
data[2] = (int)v.Z;
data[3] = (int)v.W;
}
}
}

265
src/ImageSharp/Formats/Jpg/JpegDecoderCore.cs

@ -3,6 +3,7 @@
// Licensed under the Apache License, Version 2.0.
// </copyright>
using System.Diagnostics;
using System.Runtime.CompilerServices;
namespace ImageSharp.Formats
@ -14,7 +15,7 @@ namespace ImageSharp.Formats
/// <summary>
/// Performs the jpeg decoding operation.
/// </summary>
internal class JpegDecoderCore : IDisposable
internal unsafe class JpegDecoderCore : IDisposable
{
/// <summary>
/// The maximum (inclusive) number of bits in a Huffman code.
@ -84,7 +85,7 @@ namespace ImageSharp.Formats
/// <summary>
/// Saved state between progressive-mode scans.
/// </summary>
private readonly Block[][] progCoeffs;
private readonly Block8x8F[][] progCoeffs;
/// <summary>
/// The huffman trees
@ -96,7 +97,7 @@ namespace ImageSharp.Formats
/// <summary>
/// Quantization tables, in zigzag order.
/// </summary>
private readonly Block[] quantizationTables;
private readonly Block8x8F[] quantizationTables;
/// <summary>
/// A temporary buffer for holding pixels
@ -201,12 +202,12 @@ namespace ImageSharp.Formats
public JpegDecoderCore()
{
//this.huffmanTrees = new Huffman[MaxTc + 1, MaxTh + 1];
this.huffmanTrees = new Huffman[(MaxTc + 1)*(MaxTh + 1)];
this.huffmanTrees = new Huffman[(MaxTc + 1) * (MaxTh + 1)];
this.quantizationTables = Block.CreateArray(MaxTq + 1);
this.temp = new byte[2 * Block.BlockSize];
this.quantizationTables = new Block8x8F[MaxTq + 1];
this.temp = new byte[2 * BlockF.BlockSize];
this.componentArray = new Component[MaxComponents];
this.progCoeffs = new Block[MaxComponents][];
this.progCoeffs = new Block8x8F[MaxComponents][];
this.bits = new Bits();
this.bytes = new Bytes();
@ -216,21 +217,9 @@ namespace ImageSharp.Formats
{
for (int j = 0; j < MaxTh + 1; j++)
{
//this.huffmanTrees[i, j].Init(LutSize, MaxNCodes, MaxCodeLength);
this.huffmanTrees[i* ThRowSize + j].Init(LutSize, MaxNCodes, MaxCodeLength);
this.huffmanTrees[i * ThRowSize + j].Init(LutSize, MaxNCodes, MaxCodeLength);
}
}
//for (int i = 0; i < this.quantizationTables.Length; i++)
//{
// //this.quantizationTables[i] = new Block();
// this.quantizationTables[i].Init();
//}
//for (int i = 0; i < this.componentArray.Length; i++)
//{
// this.componentArray[i] = new Component();
//}
}
@ -515,7 +504,7 @@ namespace ImageSharp.Formats
throw new ImageFormatException("Bad Th value");
}
ProcessDefineHuffmanTablesMarkerLoop(ref this.huffmanTrees[tc* ThRowSize + th], ref remaining);
ProcessDefineHuffmanTablesMarkerLoop(ref this.huffmanTrees[tc * ThRowSize + th], ref remaining);
}
}
@ -571,8 +560,8 @@ namespace ImageSharp.Formats
// whose codeLength's high bits matches code.
// The high 8 bits of lutValue are the encoded value.
// The low 8 bits are 1 plus the codeLength.
byte base2 = (byte) (code << (7 - i));
ushort lutValue = (ushort) ((huffman.Values[x] << 8) | (2 + i));
byte base2 = (byte)(code << (7 - i));
ushort lutValue = (ushort)((huffman.Values[x] << 8) | (2 + i));
for (int k = 0; k < 1 << (7 - i); k++)
{
@ -1117,32 +1106,32 @@ namespace ImageSharp.Formats
switch (x >> 4)
{
case 0:
if (remaining < Block.BlockSize)
if (remaining < BlockF.BlockSize)
{
done = true;
break;
}
remaining -= Block.BlockSize;
this.ReadFull(this.temp, 0, Block.BlockSize);
remaining -= BlockF.BlockSize;
this.ReadFull(this.temp, 0, BlockF.BlockSize);
for (int i = 0; i < Block.BlockSize; i++)
for (int i = 0; i < BlockF.BlockSize; i++)
{
this.quantizationTables[tq][i] = this.temp[i];
}
break;
case 1:
if (remaining < 2 * Block.BlockSize)
if (remaining < 2 * BlockF.BlockSize)
{
done = true;
break;
}
remaining -= 2 * Block.BlockSize;
this.ReadFull(this.temp, 0, 2 * Block.BlockSize);
remaining -= 2 * BlockF.BlockSize;
this.ReadFull(this.temp, 0, 2 * BlockF.BlockSize);
for (int i = 0; i < Block.BlockSize; i++)
for (int i = 0; i < BlockF.BlockSize; i++)
{
this.quantizationTables[tq][i] = (this.temp[2 * i] << 8) | this.temp[(2 * i) + 1];
}
@ -1471,7 +1460,7 @@ namespace ImageSharp.Formats
}
}
private Block scanWorkerBlock = Block.Create();
private BlockF scanWorkerBlock = BlockF.Create();
/// <summary>
/// Processes the SOS (Start of scan marker).
@ -1535,7 +1524,7 @@ namespace ImageSharp.Formats
// significant bit.
// For baseline JPEGs, these parameters are hard-coded to 0/63/0/0.
int zigStart = 0;
int zigEnd = Block.BlockSize - 1;
int zigEnd = BlockF.BlockSize - 1;
int ah = 0;
int al = 0;
@ -1546,7 +1535,7 @@ namespace ImageSharp.Formats
ah = this.temp[3 + scanComponentCountX2] >> 4;
al = this.temp[3 + scanComponentCountX2] & 0x0f;
if ((zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || Block.BlockSize <= zigEnd)
if ((zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || BlockF.BlockSize <= zigEnd)
{
throw new ImageFormatException("Bad spectral selection bounds");
}
@ -1580,12 +1569,9 @@ namespace ImageSharp.Formats
int compIndex = scan[i].Index;
if (this.progCoeffs[compIndex] == null)
{
this.progCoeffs[compIndex] = Block.CreateArray(mxx * myy * this.componentArray[compIndex].HorizontalFactor * this.componentArray[compIndex].VerticalFactor);
var size = mxx * myy * this.componentArray[compIndex].HorizontalFactor * this.componentArray[compIndex].VerticalFactor;
for (int j = 0; j < this.progCoeffs[compIndex].Length; j++)
{
this.progCoeffs[compIndex][j].Init();
}
this.progCoeffs[compIndex] = new Block8x8F[size];
}
}
}
@ -1603,6 +1589,10 @@ namespace ImageSharp.Formats
// blocks: the third block in the first row has (bx, by) = (2, 0).
int bx, by, blockCount = 0;
Block8x8F b = new Block8x8F();
Block8x8F temp1 = new Block8x8F();
Block8x8F temp2 = new Block8x8F();
for (int my = 0; my < myy; my++)
{
for (int mx = 0; mx < mxx; mx++)
@ -1612,7 +1602,7 @@ namespace ImageSharp.Formats
int compIndex = scan[i].Index;
int hi = this.componentArray[compIndex].HorizontalFactor;
int vi = this.componentArray[compIndex].VerticalFactor;
for (int j = 0; j < hi * vi; j++)
{
@ -1656,27 +1646,39 @@ namespace ImageSharp.Formats
}
var qtIndex = this.componentArray[compIndex].Selector;
if (this.isProgressive) // Load the previous partially decoded coefficients, if applicable.
{
blockIndex = ((@by * mxx) * hi) + bx;
ProcessBlockImpl(ah,
ref this.progCoeffs[compIndex][blockIndex],
scan, i, zigStart, zigEnd, al, dc, compIndex, @by, mxx, hi, bx,
ref this.quantizationTables[qtIndex]
);
}
else
// TODO: Find a way to clean up this mess
fixed (Block8x8F* qtp = &this.quantizationTables[qtIndex])
{
//var b = Block.Create();
scanWorkerBlock.Clear();
ProcessBlockImpl(ah, ref scanWorkerBlock, scan, i, zigStart, zigEnd, al, dc, compIndex, @by, mxx, hi,
bx, ref this.quantizationTables[qtIndex]
);
//b.Dispose();
if (this.isProgressive) // Load the previous partially decoded coefficients, if applicable.
{
blockIndex = ((@by * mxx) * hi) + bx;
fixed (Block8x8F* bp = &this.progCoeffs[compIndex][blockIndex])
{
ProcessBlockImpl(ah,
bp,
&temp1,
&temp2,
scan, i, zigStart, zigEnd, al, dc, compIndex, @by, mxx, hi, bx,
qtp
);
}
}
else
{
b.Clear();
ProcessBlockImpl(ah,
&b,
&temp1,
&temp2,
scan, i, zigStart, zigEnd, al, dc, compIndex, @by, mxx, hi,
bx, qtp
);
}
}
}
// for j
@ -1718,12 +1720,19 @@ namespace ImageSharp.Formats
// for my
}
private void ProcessBlockImpl(int ah, ref Block b, Scan[] scan, int i, int zigStart, int zigEnd, int al,
int[] dc, int compIndex, int @by, int mxx, int hi, int bx, ref Block qt)
private void ProcessBlockImpl(
int ah,
Block8x8F* b,
Block8x8F* temp1,
Block8x8F* temp2,
Scan[] scan,
int i, int zigStart, int zigEnd, int al,
int[] dc, int compIndex, int @by, int mxx, int hi, int bx,
Block8x8F* qt)
{
if (ah != 0)
{
this.Refine(ref b, ref this.huffmanTrees[AcTable * ThRowSize + scan[i].AcTableSelector], zigStart, zigEnd, 1 << al);
this.Refine(b, ref this.huffmanTrees[AcTable * ThRowSize + scan[i].AcTableSelector], zigStart, zigEnd, 1 << al);
}
else
{
@ -1741,7 +1750,9 @@ namespace ImageSharp.Formats
int deltaDC = this.ReceiveExtend(value);
dc[compIndex] += deltaDC;
b[0] = dc[compIndex] << al;
//b[0] = dc[compIndex] << al;
Block8x8F.SetScalarAt(b, 0, dc[compIndex] << al);
}
if (zig <= zigEnd && this.eobRun > 0)
@ -1755,8 +1766,8 @@ namespace ImageSharp.Formats
for (; zig <= zigEnd; zig++)
{
byte value = this.DecodeHuffman(ref this.huffmanTrees[AcTable * ThRowSize + scan[i].AcTableSelector]);
byte val0 = (byte) (value >> 4);
byte val1 = (byte) (value & 0x0f);
byte val0 = (byte)(value >> 4);
byte val1 = (byte)(value & 0x0f);
if (val1 != 0)
{
zig += val0;
@ -1766,16 +1777,18 @@ namespace ImageSharp.Formats
}
int ac = this.ReceiveExtend(val1);
b[Unzig[zig]] = ac << al;
//b[Unzig[zig]] = ac << al;
Block8x8F.SetScalarAt(b, Unzig[zig], ac << al);
}
else
{
if (val0 != 0x0f)
{
this.eobRun = (ushort) (1 << val0);
this.eobRun = (ushort)(1 << val0);
if (val0 != 0)
{
this.eobRun |= (ushort) this.DecodeBits(val0);
this.eobRun |= (ushort)this.DecodeBits(val0);
}
this.eobRun--;
@ -1790,11 +1803,14 @@ namespace ImageSharp.Formats
if (this.isProgressive)
{
if (zigEnd != Block.BlockSize - 1 || al != 0)
if (zigEnd != BlockF.BlockSize - 1 || al != 0)
{
// We haven't completely decoded this 8x8 block. Save the coefficients.
this.progCoeffs[compIndex][((@by*mxx)*hi) + bx] = b.Clone();
// TODO!!!
//throw new NotImplementedException();
//this.progCoeffs[compIndex][((@by * mxx) * hi) + bx] = b.Clone();
this.progCoeffs[compIndex][((@by * mxx) * hi) + bx] = *b;
// At this point, we could execute the rest of the loop body to dequantize and
// perform the inverse DCT, to save early stages of a progressive image to the
@ -1806,22 +1822,23 @@ namespace ImageSharp.Formats
}
// Dequantize, perform the inverse DCT and store the block to the image.
for (int zig = 0; zig < Block.BlockSize; zig++)
for (int zig = 0; zig < BlockF.BlockSize; zig++)
{
b[Unzig[zig]] *= qt[zig];
// TODO: We really need the fancy new corefxlab Span<float> here ...
//b[Unzig[zig]] *= qt[zig];
int unzigIdx = Unzig[zig];
float value = Block8x8F.GetScalarAt(b, unzigIdx);
value *= Block8x8F.GetScalarAt(qt, zig);
Block8x8F.SetScalarAt(b, unzigIdx, value);
}
IDCT.Transform(ref b);
// ******* Other experimental variants: *************
// FluxJpeg:
// https://github.com/antonfirsov/ImageSharp/blob/master/src/ImageSharp46/Formats/Jpg/Components/FloatIDCT.cs
// FloatIDCT.Transform(ref b);
// SIMD-based:
// https://github.com/antonfirsov/ImageSharp/blob/master/src/ImageSharp46/Formats/Jpg/Components/MagicDCT.cs
// MagicDCT.IDCT(ref b);
//IDCT.Transform(ref b);
//FloatIDCT.Transform(ref b);
//ReferenceDCT.IDCT(ref b);
//Block8x8F.SuchIDCT(ref b);
//b->IDCTInplace();
b->IDCTInto(ref *temp1, ref *temp2);
byte[] dst;
int offset;
@ -1831,7 +1848,7 @@ namespace ImageSharp.Formats
{
dst = this.grayImage.Pixels;
stride = this.grayImage.Stride;
offset = this.grayImage.Offset + (8*((@by*this.grayImage.Stride) + bx));
offset = this.grayImage.Offset + (8 * ((@by * this.grayImage.Stride) + bx));
}
else
{
@ -1840,26 +1857,26 @@ namespace ImageSharp.Formats
case 0:
dst = this.ycbcrImage.YChannel;
stride = this.ycbcrImage.YStride;
offset = this.ycbcrImage.YOffset + (8*((@by*this.ycbcrImage.YStride) + bx));
offset = this.ycbcrImage.YOffset + (8 * ((@by * this.ycbcrImage.YStride) + bx));
break;
case 1:
dst = this.ycbcrImage.CbChannel;
stride = this.ycbcrImage.CStride;
offset = this.ycbcrImage.COffset + (8*((@by*this.ycbcrImage.CStride) + bx));
offset = this.ycbcrImage.COffset + (8 * ((@by * this.ycbcrImage.CStride) + bx));
break;
case 2:
dst = this.ycbcrImage.CrChannel;
stride = this.ycbcrImage.CStride;
offset = this.ycbcrImage.COffset + (8*((@by*this.ycbcrImage.CStride) + bx));
offset = this.ycbcrImage.COffset + (8 * ((@by * this.ycbcrImage.CStride) + bx));
break;
case 3:
dst = this.blackPixels;
stride = this.blackStride;
offset = 8*((@by*this.blackStride) + bx);
offset = 8 * ((@by * this.blackStride) + bx);
break;
default:
@ -1868,32 +1885,12 @@ namespace ImageSharp.Formats
}
// Level shift by +128, clip to [0, 255], and write to dst.
for (int y = 0; y < 8; y++)
{
int y8 = y*8;
int yStride = y*stride;
for (int x = 0; x < 8; x++)
{
int c = b[y8 + x];
if (c < -128)
{
c = 0;
}
else if (c > 127)
{
c = 255;
}
else
{
c += 128;
}
dst[yStride + x + offset] = (byte) c;
}
}
//temp1->CopyColorsPlz(new MutableSpan<byte>(dst, offset), stride);
temp1->CopyColorsTo(new MutableSpan<byte>(dst, offset), stride, temp2);
}
private void ProcessScanImpl(int i, ref Scan currentScan, Scan[] scan, ref int totalHv)
{
// Component selector.
@ -1934,15 +1931,15 @@ namespace ImageSharp.Formats
}
totalHv += currentComponent.HorizontalFactor*currentComponent.VerticalFactor;
totalHv += currentComponent.HorizontalFactor * currentComponent.VerticalFactor;
currentScan.DcTableSelector = (byte) (this.temp[2 + (2*i)] >> 4);
currentScan.DcTableSelector = (byte)(this.temp[2 + (2 * i)] >> 4);
if (currentScan.DcTableSelector > MaxTh)
{
throw new ImageFormatException("Bad DC table selector value");
}
currentScan.AcTableSelector = (byte) (this.temp[2 + (2*i)] & 0x0f);
currentScan.AcTableSelector = (byte)(this.temp[2 + (2 * i)] & 0x0f);
if (currentScan.AcTableSelector > MaxTh)
{
throw new ImageFormatException("Bad AC table selector value");
@ -1957,7 +1954,7 @@ namespace ImageSharp.Formats
/// <param name="zigStart">The zig-zag start index</param>
/// <param name="zigEnd">The zig-zag end index</param>
/// <param name="delta">The low transform offset</param>
private void Refine(ref Block b, ref Huffman h, int zigStart, int zigEnd, int delta)
private void Refine(Block8x8F* b, ref Huffman h, int zigStart, int zigEnd, int delta)
{
// Refining a DC component is trivial.
if (zigStart == 0)
@ -1970,7 +1967,12 @@ namespace ImageSharp.Formats
bool bit = this.DecodeBit();
if (bit)
{
b[0] |= delta;
int stuff = (int) Block8x8F.GetScalarAt(b, 0);
//int stuff = (int)b[0];
stuff |= delta;
//b[0] = stuff;
Block8x8F.SetScalarAt(b, 0, stuff);
}
return;
@ -2021,6 +2023,8 @@ namespace ImageSharp.Formats
break;
}
int blah = zig;
zig = this.RefineNonZeroes(b, zig, zigEnd, val0, delta);
if (zig > zigEnd)
{
@ -2029,7 +2033,8 @@ namespace ImageSharp.Formats
if (z != 0)
{
b[Unzig[zig]] = z;
//b[Unzig[zig]] = z;
Block8x8F.SetScalarAt(b, Unzig[zig], z);
}
}
}
@ -2051,12 +2056,15 @@ namespace ImageSharp.Formats
/// <param name="nz">The non-zero entry</param>
/// <param name="delta">The low transform offset</param>
/// <returns>The <see cref="int"/></returns>
private int RefineNonZeroes(Block b, int zig, int zigEnd, int nz, int delta)
private int RefineNonZeroes(Block8x8F* b, int zig, int zigEnd, int nz, int delta)
{
for (; zig <= zigEnd; zig++)
{
int u = Unzig[zig];
if (b[u] == 0)
float bu = Block8x8F.GetScalarAt(b, u);
// TODO: Are the equality comparsions OK with floating point values? Isn't an epsilon value necessary?
if (bu == 0)
{
if (nz == 0)
{
@ -2073,13 +2081,15 @@ namespace ImageSharp.Formats
continue;
}
if (b[u] >= 0)
if (bu >= 0)
{
b[u] += delta;
//b[u] += delta;
Block8x8F.SetScalarAt(b, u, bu + delta);
}
else
{
b[u] -= delta;
//b[u] -= delta;
Block8x8F.SetScalarAt(b, u, bu - delta);
}
}
@ -2271,15 +2281,6 @@ namespace ImageSharp.Formats
public void Dispose()
{
scanWorkerBlock.Dispose();
Block.DisposeAll(this.quantizationTables);
foreach (Block[] blocks in progCoeffs)
{
if (blocks != null)
{
Block.DisposeAll(blocks);
}
}
for (int i = 0; i < huffmanTrees.Length; i++)
{

511
tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

@ -0,0 +1,511 @@
// Uncomment this to turn unit tests into benchmarks:
//#define BENCHMARKING
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using ImageSharp.Formats;
using Xunit;
using Xunit.Abstractions;
// ReSharper disable InconsistentNaming
namespace ImageSharp.Tests.Formats.Jpg
{
public class Block8x8FTests : UtilityTestClassBase
{
#if BENCHMARKING
public const int Times = 1000000;
#else
public const int Times = 1;
#endif
public Block8x8FTests(ITestOutputHelper output) : base(output)
{
}
[Fact]
public void Indexer()
{
float sum = 0;
Measure(Times, () =>
{
Block8x8F block = new Block8x8F();
for (int i = 0; i < Block8x8F.ScalarCount; i++)
{
block[i] = i;
}
sum = 0;
for (int i = 0; i < Block8x8F.ScalarCount; i++)
{
sum += block[i];
}
});
Assert.Equal(sum, 64f*63f*0.5f);
}
[Fact]
public unsafe void Indexer_GetScalarAt_SetScalarAt()
{
float sum = 0;
Measure(Times, () =>
{
Block8x8F block = new Block8x8F();
for (int i = 0; i < Block8x8F.ScalarCount; i++)
{
Block8x8F.SetScalarAt(&block, i, i);
}
sum = 0;
for (int i = 0; i < Block8x8F.ScalarCount; i++)
{
sum += Block8x8F.GetScalarAt(&block, i);
}
});
Assert.Equal(sum, 64f*63f*0.5f);
}
[Fact]
public void Indexer_ReferenceBenchmarkWithArray()
{
float sum = 0;
Measure(Times, () =>
{
//Block8x8F block = new Block8x8F();
float[] block = new float[64];
for (int i = 0; i < Block8x8F.ScalarCount; i++)
{
block[i] = i;
}
sum = 0;
for (int i = 0; i < Block8x8F.ScalarCount; i++)
{
sum += block[i];
}
});
Assert.Equal(sum, 64f*63f*0.5f);
}
[Fact]
public void Load_Store_FloatArray()
{
float[] data = new float[Block8x8F.ScalarCount];
float[] mirror = new float[Block8x8F.ScalarCount];
for (int i = 0; i < Block8x8F.ScalarCount; i++)
{
data[i] = i;
}
Measure(Times, () =>
{
Block8x8F b = new Block8x8F();
b.LoadFrom(data);
b.CopyTo(mirror);
});
Assert.Equal(data, mirror);
//PrintLinearData((MutableSpan<float>)mirror);
}
[Fact]
public unsafe void Load_Store_FloatArray_Ptr()
{
float[] data = new float[Block8x8F.ScalarCount];
float[] mirror = new float[Block8x8F.ScalarCount];
for (int i = 0; i < Block8x8F.ScalarCount; i++)
{
data[i] = i;
}
Measure(Times, () =>
{
Block8x8F b = new Block8x8F();
Block8x8F.LoadFrom(&b, data);
Block8x8F.CopyTo(&b, mirror);
});
Assert.Equal(data, mirror);
//PrintLinearData((MutableSpan<float>)mirror);
}
[Fact]
public void Load_Store_IntArray()
{
int[] data = new int[Block8x8F.ScalarCount];
int[] mirror = new int[Block8x8F.ScalarCount];
for (int i = 0; i < Block8x8F.ScalarCount; i++)
{
data[i] = i;
}
Measure(Times, () =>
{
Block8x8F v = new Block8x8F();
v.LoadFrom(data);
v.CopyTo(mirror);
});
Assert.Equal(data, mirror);
//PrintLinearData((MutableSpan<int>)mirror);
}
[Fact]
public void TransposeInplace()
{
float[] expected = Create8x8FloatData();
ReferenceImplementations.Transpose8x8(expected);
Block8x8F buffer = new Block8x8F();
buffer.LoadFrom(Create8x8FloatData());
buffer.TransposeInplace();
float[] actual = new float[64];
buffer.CopyTo(actual);
Assert.Equal(expected, actual);
}
[Fact]
public void TranposeInto_PinningImpl()
{
float[] expected = Create8x8FloatData();
ReferenceImplementations.Transpose8x8(expected);
Block8x8F source = new Block8x8F();
source.LoadFrom(Create8x8FloatData());
Block8x8F dest = new Block8x8F();
source.TransposeInto_PinningImpl(ref dest);
float[] actual = new float[64];
dest.CopyTo(actual);
Assert.Equal(expected, actual);
}
[Fact]
public void TransposeInto()
{
float[] expected = Create8x8FloatData();
ReferenceImplementations.Transpose8x8(expected);
Block8x8F source = new Block8x8F();
source.LoadFrom(Create8x8FloatData());
Block8x8F dest = new Block8x8F();
source.TransposeInto(ref dest);
float[] actual = new float[64];
dest.CopyTo(actual);
Assert.Equal(expected, actual);
}
[Fact]
public void TransposeInto_CodeGeneratorTest()
{
char[] coordz = new[] {'X', 'Y', 'Z', 'W'};
StringBuilder bld = new StringBuilder();
for (int i = 0; i < 8; i++)
{
char destCoord = coordz[i%4];
char destSide = (i/4)%2 == 0 ? 'L' : 'R';
for (int j = 0; j < 8; j++)
{
char srcCoord = coordz[j%4];
char srcSide = (j/4)%2 == 0 ? 'L' : 'R';
string expression = $"d.V{j}{destSide}.{destCoord} = V{i}{srcSide}.{srcCoord}; ";
bld.Append(expression);
}
bld.AppendLine();
}
Output.WriteLine(bld.ToString());
}
[Fact]
public unsafe void TransposeInto_WithPointers()
{
float[] expected = Create8x8FloatData();
ReferenceImplementations.Transpose8x8(expected);
Block8x8F source = new Block8x8F();
source.LoadFrom(Create8x8FloatData());
Block8x8F dest = new Block8x8F();
Block8x8F* sPtr = &source;
Block8x8F* dPtr = &dest;
Block8x8F.TransposeInto(sPtr, dPtr);
float[] actual = new float[64];
dest.CopyTo(actual);
Assert.Equal(expected, actual);
}
private class BufferHolder
{
public Block8x8F Buffer;
}
[Fact]
public void TranposeInto_Benchmark()
{
BufferHolder source = new BufferHolder();
source.Buffer.LoadFrom(Create8x8FloatData());
BufferHolder dest = new BufferHolder();
Output.WriteLine($"TranposeInto_PinningImpl_Benchmark X {Times} ...");
Stopwatch sw = Stopwatch.StartNew();
for (int i = 0; i < Times; i++)
{
source.Buffer.TransposeInto(ref dest.Buffer);
}
sw.Stop();
Output.WriteLine($"TranposeInto_PinningImpl_Benchmark finished in {sw.ElapsedMilliseconds} ms");
}
[Fact]
public void TranposeInto_PinningImpl_Benchmark()
{
BufferHolder source = new BufferHolder();
source.Buffer.LoadFrom(Create8x8FloatData());
BufferHolder dest = new BufferHolder();
Output.WriteLine($"TranposeInto_PinningImpl_Benchmark X {Times} ...");
Stopwatch sw = Stopwatch.StartNew();
for (int i = 0; i < Times; i++)
{
source.Buffer.TransposeInto_PinningImpl(ref dest.Buffer);
}
sw.Stop();
Output.WriteLine($"TranposeInto_PinningImpl_Benchmark finished in {sw.ElapsedMilliseconds} ms");
}
[Fact]
public unsafe void TransposeInto_WithPointers_Benchmark()
{
BufferHolder source = new BufferHolder();
source.Buffer.LoadFrom(Create8x8FloatData());
BufferHolder dest = new BufferHolder();
fixed (Block8x8F* sPtr = &source.Buffer)
{
fixed (Block8x8F* dPtr = &dest.Buffer)
{
Output.WriteLine($"TransposeInto_WithPointers_Benchmark X {Times} ...");
Stopwatch sw = Stopwatch.StartNew();
for (int i = 0; i < Times; i++)
{
Block8x8F.TransposeInto(sPtr, dPtr);
}
sw.Stop();
Output.WriteLine($"TransposeInto_WithPointers_Benchmark finished in {sw.ElapsedMilliseconds} ms");
}
}
}
[Fact]
public void iDCT2D8x4_LeftPart()
{
float[] sourceArray = Create8x8FloatData();
float[] expectedDestArray = new float[64];
ReferenceImplementations.iDCT2D8x4_32f(sourceArray, expectedDestArray);
Block8x8F source = new Block8x8F();
source.LoadFrom(sourceArray);
Block8x8F dest = new Block8x8F();
source.iDCT2D8x4_LeftPart(ref dest);
float[] actualDestArray = new float[64];
dest.CopyTo(actualDestArray);
Print8x8Data(expectedDestArray);
Output.WriteLine("**************");
Print8x8Data(actualDestArray);
Assert.Equal(expectedDestArray, actualDestArray);
}
[Fact]
public void iDCT2D8x4_RightPart()
{
MutableSpan<float> sourceArray = Create8x8FloatData();
MutableSpan<float> expectedDestArray = new float[64];
ReferenceImplementations.iDCT2D8x4_32f(sourceArray.Slice(4), expectedDestArray.Slice(4));
Block8x8F source = new Block8x8F();
source.LoadFrom(sourceArray);
Block8x8F dest = new Block8x8F();
source.iDCT2D8x4_RightPart(ref dest);
float[] actualDestArray = new float[64];
dest.CopyTo(actualDestArray);
Print8x8Data(expectedDestArray);
Output.WriteLine("**************");
Print8x8Data(actualDestArray);
Assert.Equal(expectedDestArray.Data, actualDestArray);
}
private struct ApproximateFloatComparer : IEqualityComparer<float>
{
private const float Eps = 0.0001f;
public bool Equals(float x, float y)
{
float d = x - y;
return d > -Eps && d < Eps;
}
public int GetHashCode(float obj)
{
throw new InvalidOperationException();
}
}
[Fact]
public void IDCTInto()
{
float[] sourceArray = Create8x8FloatData();
float[] expectedDestArray = new float[64];
float[] tempArray = new float[64];
ReferenceImplementations.iDCT2D_llm(sourceArray, expectedDestArray, tempArray);
//ReferenceImplementations.iDCT8x8_llm_sse(sourceArray, expectedDestArray, tempArray);
Block8x8F source = new Block8x8F();
source.LoadFrom(sourceArray);
Block8x8F dest = new Block8x8F();
Block8x8F tempBuffer = new Block8x8F();
source.IDCTInto(ref dest, ref tempBuffer);
float[] actualDestArray = new float[64];
dest.CopyTo(actualDestArray);
Print8x8Data(expectedDestArray);
Output.WriteLine("**************");
Print8x8Data(actualDestArray);
Assert.Equal(expectedDestArray, actualDestArray, new ApproximateFloatComparer());
Assert.Equal(expectedDestArray, actualDestArray, new ApproximateFloatComparer());
}
[Fact]
public unsafe void CopyColorsTo()
{
var data = Create8x8FloatData();
Block8x8F block = new Block8x8F();
block.LoadFrom(data);
block.MultiplyAllInplace(new Vector4(5, 5, 5, 5));
int stride = 256;
int height = 42;
int offset = height*10 + 20;
byte[] colorsExpected = new byte[stride*height];
byte[] colorsActual = new byte[stride*height];
Block8x8F temp = new Block8x8F();
ReferenceImplementations.CopyColorsTo(ref block, new MutableSpan<byte>(colorsExpected, offset), stride);
block.CopyColorsTo(new MutableSpan<byte>(colorsActual, offset), stride, &temp);
//Output.WriteLine("******* EXPECTED: *********");
//PrintLinearData(colorsExpected);
//Output.WriteLine("******** ACTUAL: **********");
Assert.Equal(colorsExpected, colorsActual);
}
[Fact]
public void CropInto()
{
Block8x8F block = new Block8x8F();
block.LoadFrom(Create8x8FloatData());
Block8x8F dest = new Block8x8F();
block.CropInto(10, 20, ref dest);
float[] array = new float[64];
dest.CopyTo(array);
PrintLinearData(array);
foreach (float val in array)
{
Assert.InRange(val, 10, 20);
}
}
private static float[] Create8x8ColorCropTestData()
{
float[] result = new float[64];
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 8; j++)
{
result[i * 8 + j] = -300 + i * 100 + j * 10;
}
}
return result;
}
[Fact]
public void ColorifyInto()
{
Block8x8F block = new Block8x8F();
var input = Create8x8ColorCropTestData();
block.LoadFrom(input);
Output.WriteLine("Input:");
PrintLinearData(input);
Block8x8F dest = new Block8x8F();
block.ColorifyInto(ref dest);
float[] array = new float[64];
dest.CopyTo(array);
Output.WriteLine("Result:");
PrintLinearData(array);
foreach (float val in array)
{
Assert.InRange(val, 0, 255);
}
}
}
}

54
tests/ImageSharp.Tests/Formats/Jpg/DctTests.cs

@ -0,0 +1,54 @@
using System.Numerics;
using ImageSharp.Formats;
using Xunit;
using Xunit.Abstractions;
namespace ImageSharp.Tests.Formats.Jpg
{
public class DctTests : UtilityTestClassBase
{
public DctTests(ITestOutputHelper output)
: base(output)
{
}
[Fact]
public void Mennyi()
{
Output.WriteLine(Vector.IsHardwareAccelerated.ToString());
Output.WriteLine(Vector<float>.Count.ToString());
}
[Fact]
public void CheckTestData()
{
var data = Create8x8FloatData();
Print8x8Data(data);
}
[Fact]
public void Transpose8x8()
{
var data = Create8x8FloatData();
MutableSpan<float> result = new MutableSpan<float>(64);
ReferenceImplementations.Transpose8x8(data, result);
Print8x8Data(result.Data);
}
[Fact]
public void Transpose8x8_Inplace()
{
var data = Create8x8FloatData();
ReferenceImplementations.Transpose8x8(data);
Print8x8Data(data);
}
}
}

364
tests/ImageSharp.Tests/Formats/Jpg/ReferenceImplementations.cs

@ -0,0 +1,364 @@
using System;
using System.Buffers;
using System.Numerics;
using System.Runtime.CompilerServices;
using ImageSharp.Formats;
// ReSharper disable InconsistentNaming
namespace ImageSharp.Tests.Formats.Jpg
{
/// <summary>
/// This class contains simplified (unefficient) reference implementations so we can verify actual ones in unit tests
/// DCT code Ported from https://github.com/norishigefukushima/dct_simd
/// </summary>
public static class ReferenceImplementations
{
internal static void Transpose8x8(MutableSpan<float> data)
{
for (int i = 1; i < 8; i++)
{
int i8 = i*8;
for (int j = 0; j < i; j++)
{
float tmp = data[i8 + j];
data[i8 + j] = data[j*8 + i];
data[j*8 + i] = tmp;
}
}
}
internal static void Transpose8x8(MutableSpan<float> src, MutableSpan<float> dest)
{
for (int i = 0; i < 8; i++)
{
int i8 = i*8;
for (int j = 0; j < 8; j++)
{
dest[j*8 + i] = src[i8 + j];
}
}
}
internal static void iDCT1Dllm_32f(MutableSpan<float> y, MutableSpan<float> x)
{
float a0, a1, a2, a3, b0, b1, b2, b3;
float z0, z1, z2, z3, z4;
float r0 = 1.414214f;
float r1 = 1.387040f;
float r2 = 1.306563f;
float r3 = 1.175876f;
float r4 = 1.000000f;
float r5 = 0.785695f;
float r6 = 0.541196f;
float r7 = 0.275899f;
z0 = y[1] + y[7];
z1 = y[3] + y[5];
z2 = y[3] + y[7];
z3 = y[1] + y[5];
z4 = (z0 + z1)*r3;
z0 = z0*(-r3 + r7);
z1 = z1*(-r3 - r1);
z2 = z2*(-r3 - r5) + z4;
z3 = z3*(-r3 + r5) + z4;
b3 = y[7]*(-r1 + r3 + r5 - r7) + z0 + z2;
b2 = y[5]*(r1 + r3 - r5 + r7) + z1 + z3;
b1 = y[3]*(r1 + r3 + r5 - r7) + z1 + z2;
b0 = y[1]*(r1 + r3 - r5 - r7) + z0 + z3;
z4 = (y[2] + y[6])*r6;
z0 = y[0] + y[4];
z1 = y[0] - y[4];
z2 = z4 - y[6]*(r2 + r6);
z3 = z4 + y[2]*(r2 - r6);
a0 = z0 + z3;
a3 = z0 - z3;
a1 = z1 + z2;
a2 = z1 - z2;
x[0] = a0 + b0;
x[7] = a0 - b0;
x[1] = a1 + b1;
x[6] = a1 - b1;
x[2] = a2 + b2;
x[5] = a2 - b2;
x[3] = a3 + b3;
x[4] = a3 - b3;
}
internal static void iDCT2D_llm(MutableSpan<float> s, MutableSpan<float> d, MutableSpan<float> temp)
{
int j;
for (j = 0; j < 8; j++)
{
iDCT1Dllm_32f(s.Slice(j*8), temp.Slice(j*8));
}
Transpose8x8(temp, d);
for (j = 0; j < 8; j++)
{
iDCT1Dllm_32f(d.Slice(j*8), temp.Slice(j*8));
}
Transpose8x8(temp, d);
for (j = 0; j < 64; j++)
{
d[j] *= 0.125f;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector4 _mm_load_ps(MutableSpan<float> src, int offset)
{
src = src.Slice(offset);
return new Vector4(src[0], src[1], src[2], src[3]);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector4 _mm_load_ps(MutableSpan<float> src)
{
return new Vector4(src[0], src[1], src[2], src[3]);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void _mm_store_ps(MutableSpan<float> dest, int offset, Vector4 src)
{
dest = dest.Slice(offset);
dest[0] = src.X;
dest[1] = src.Y;
dest[2] = src.Z;
dest[3] = src.W;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void _mm_store_ps(MutableSpan<float> dest, Vector4 src)
{
dest[0] = src.X;
dest[1] = src.Y;
dest[2] = src.Z;
dest[3] = src.W;
}
private static readonly Vector4 _1_175876 = new Vector4(1.175876f);
private static readonly Vector4 _1_961571 = new Vector4(-1.961571f);
private static readonly Vector4 _0_390181 = new Vector4(-0.390181f);
private static readonly Vector4 _0_899976 = new Vector4(-0.899976f);
private static readonly Vector4 _2_562915 = new Vector4(-2.562915f);
private static readonly Vector4 _0_298631 = new Vector4(0.298631f);
private static readonly Vector4 _2_053120 = new Vector4(2.053120f);
private static readonly Vector4 _3_072711 = new Vector4(3.072711f);
private static readonly Vector4 _1_501321 = new Vector4(1.501321f);
private static readonly Vector4 _0_541196 = new Vector4(0.541196f);
private static readonly Vector4 _1_847759 = new Vector4(-1.847759f);
private static readonly Vector4 _0_765367 = new Vector4(0.765367f);
internal static void iDCT2D8x4_32f(MutableSpan<float> y, MutableSpan<float> x)
{
/*
float a0,a1,a2,a3,b0,b1,b2,b3; float z0,z1,z2,z3,z4; float r[8]; int i;
for(i = 0;i < 8;i++){ r[i] = (float)(cos((double)i / 16.0 * M_PI) * M_SQRT2); }
*/
/*
0: 1.414214
1: 1.387040
2: 1.306563
3:
4: 1.000000
5: 0.785695
6:
7: 0.275899
*/
Vector4 my1 = _mm_load_ps(y, 8);
Vector4 my7 = _mm_load_ps(y, 56);
Vector4 mz0 = my1 + my7;
Vector4 my3 = _mm_load_ps(y, 24);
Vector4 mz2 = my3 + my7;
Vector4 my5 = _mm_load_ps(y, 40);
Vector4 mz1 = my3 + my5;
Vector4 mz3 = my1 + my5;
Vector4 mz4 = ((mz0 + mz1)* _1_175876);
//z0 = y[1] + y[7]; z1 = y[3] + y[5]; z2 = y[3] + y[7]; z3 = y[1] + y[5];
//z4 = (z0 + z1) * r[3];
mz2 = mz2* _1_961571 + mz4;
mz3 = mz3* _0_390181 + mz4;
mz0 = mz0* _0_899976;
mz1 = mz1* _2_562915;
/*
-0.899976
-2.562915
-1.961571
-0.390181
z0 = z0 * (-r[3] + r[7]);
z1 = z1 * (-r[3] - r[1]);
z2 = z2 * (-r[3] - r[5]) + z4;
z3 = z3 * (-r[3] + r[5]) + z4;*/
Vector4 mb3 = my7* _0_298631 + mz0 + mz2;
Vector4 mb2 = my5* _2_053120 + mz1 + mz3;
Vector4 mb1 = my3* _3_072711 + mz1 + mz2;
Vector4 mb0 = my1* _1_501321 + mz0 + mz3;
/*
0.298631
2.053120
3.072711
1.501321
b3 = y[7] * (-r[1] + r[3] + r[5] - r[7]) + z0 + z2;
b2 = y[5] * ( r[1] + r[3] - r[5] + r[7]) + z1 + z3;
b1 = y[3] * ( r[1] + r[3] + r[5] - r[7]) + z1 + z2;
b0 = y[1] * ( r[1] + r[3] - r[5] - r[7]) + z0 + z3;
*/
Vector4 my2 = _mm_load_ps(y, 16);
Vector4 my6 = _mm_load_ps(y, 48);
mz4 = (my2 + my6)* _0_541196;
Vector4 my0 = _mm_load_ps(y, 0);
Vector4 my4 = _mm_load_ps(y, 32);
mz0 = my0 + my4;
mz1 = my0 - my4;
mz2 = mz4 + my6* _1_847759;
mz3 = mz4 + my2* _0_765367;
my0 = mz0 + mz3;
my3 = mz0 - mz3;
my1 = mz1 + mz2;
my2 = mz1 - mz2;
/*
1.847759
0.765367
z4 = (y[2] + y[6]) * r[6];
z0 = y[0] + y[4]; z1 = y[0] - y[4];
z2 = z4 - y[6] * (r[2] + r[6]);
z3 = z4 + y[2] * (r[2] - r[6]);
a0 = z0 + z3; a3 = z0 - z3;
a1 = z1 + z2; a2 = z1 - z2;
*/
_mm_store_ps(x, 0, my0 + mb0);
_mm_store_ps(x, 56, my0 - mb0);
_mm_store_ps(x, 8, my1 + mb1);
_mm_store_ps(x, 48, my1 - mb1);
_mm_store_ps(x, 16, my2 + mb2);
_mm_store_ps(x, 40, my2 - mb2);
_mm_store_ps(x, 24, my3 + mb3);
_mm_store_ps(x, 32, my3 - mb3);
/*
x[0] = a0 + b0; x[7] = a0 - b0;
x[1] = a1 + b1; x[6] = a1 - b1;
x[2] = a2 + b2; x[5] = a2 - b2;
x[3] = a3 + b3; x[4] = a3 - b3;
for(i = 0;i < 8;i++){ x[i] *= 0.353554f; }
*/
}
internal static void iDCT8x8_llm_sse(MutableSpan<float> s, MutableSpan<float> d, MutableSpan<float> temp)
{
Transpose8x8(s, temp);
iDCT2D8x4_32f(temp, d);
iDCT2D8x4_32f(temp.Slice(4), d.Slice(4));
Transpose8x8(d, temp);
iDCT2D8x4_32f(temp, d);
iDCT2D8x4_32f(temp.Slice(4), d.Slice(4));
Vector4 c = new Vector4(0.1250f);
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//0
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//1
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//2
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//3
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//4
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//5
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//6
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//7
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//8
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//9
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//10
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//11
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//12
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//13
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//14
_mm_store_ps(d, (_mm_load_ps(d)*c));d.AddOffset(4);//15
}
internal static unsafe void CopyColorsTo(ref Block8x8F block, MutableSpan<byte> buffer, int stride)
{
fixed (Block8x8F* p = &block)
{
float* b = (float*)p;
for (int y = 0; y < 8; y++)
{
int y8 = y * 8;
int yStride = y * stride;
for (int x = 0; x < 8; x++)
{
float c = b[y8 + x];
if (c < -128)
{
c = 0;
}
else if (c > 127)
{
c = 255;
}
else
{
c += 128;
}
buffer[yStride + x] = (byte)c;
}
}
}
}
}
}

95
tests/ImageSharp.Tests/Formats/Jpg/UtilityTestClassBase.cs

@ -0,0 +1,95 @@
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Text;
using ImageSharp.Formats;
using Xunit.Abstractions;
namespace ImageSharp.Tests.Formats.Jpg
{
public class UtilityTestClassBase
{
public UtilityTestClassBase(ITestOutputHelper output)
{
Output = output;
}
protected ITestOutputHelper Output { get; }
// ReSharper disable once InconsistentNaming
public static float[] Create8x8FloatData()
{
float[] result = new float[64];
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 8; j++)
{
result[i * 8 + j] = i * 10 + j;
}
}
return result;
}
// ReSharper disable once InconsistentNaming
public static int[] Create8x8IntData()
{
int[] result = new int[64];
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 8; j++)
{
result[i * 8 + j] = i * 10 + j;
}
}
return result;
}
internal void Print8x8Data<T>(MutableSpan<T> data) => Print8x8Data(data.Data);
internal void Print8x8Data<T>(T[] data)
{
StringBuilder bld = new StringBuilder();
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 8; j++)
{
bld.Append($"{data[i * 8 + j],3} ");
}
bld.AppendLine();
}
Output.WriteLine(bld.ToString());
}
internal void PrintLinearData<T>(T[] data) => PrintLinearData(new MutableSpan<T>(data), data.Length);
internal void PrintLinearData<T>(MutableSpan<T> data, int count = -1)
{
if (count < 0) count = data.TotalCount;
StringBuilder bld = new StringBuilder();
for (int i = 0; i < count; i++)
{
bld.Append($"{data[i],3} ");
}
Output.WriteLine(bld.ToString());
}
protected void Measure(int times, Action action, [CallerMemberName] string operationName = null)
{
Output.WriteLine($"{operationName} X {times} ...");
Stopwatch sw = Stopwatch.StartNew();
for (int i = 0; i < times; i++)
{
action();
}
sw.Stop();
Output.WriteLine($"{operationName} finished in {sw.ElapsedMilliseconds} ms");
}
}
}
Loading…
Cancel
Save