|
|
|
@ -168,23 +168,23 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils |
|
|
|
/// <param name="d">Destination</param>
|
|
|
|
public static void FDCT2D8x4_32f(Span<float> s, Span<float> d) |
|
|
|
{ |
|
|
|
Vector4 c0 = Mm_load_ps(s, 0); |
|
|
|
Vector4 c1 = Mm_load_ps(s, 56); |
|
|
|
Vector4 c0 = _mm_load_ps(s, 0); |
|
|
|
Vector4 c1 = _mm_load_ps(s, 56); |
|
|
|
Vector4 t0 = c0 + c1; |
|
|
|
Vector4 t7 = c0 - c1; |
|
|
|
|
|
|
|
c1 = Mm_load_ps(s, 48); |
|
|
|
c0 = Mm_load_ps(s, 8); |
|
|
|
c1 = _mm_load_ps(s, 48); |
|
|
|
c0 = _mm_load_ps(s, 8); |
|
|
|
Vector4 t1 = c0 + c1; |
|
|
|
Vector4 t6 = c0 - c1; |
|
|
|
|
|
|
|
c1 = Mm_load_ps(s, 40); |
|
|
|
c0 = Mm_load_ps(s, 16); |
|
|
|
c1 = _mm_load_ps(s, 40); |
|
|
|
c0 = _mm_load_ps(s, 16); |
|
|
|
Vector4 t2 = c0 + c1; |
|
|
|
Vector4 t5 = c0 - c1; |
|
|
|
|
|
|
|
c0 = Mm_load_ps(s, 24); |
|
|
|
c1 = Mm_load_ps(s, 32); |
|
|
|
c0 = _mm_load_ps(s, 24); |
|
|
|
c1 = _mm_load_ps(s, 32); |
|
|
|
Vector4 t3 = c0 + c1; |
|
|
|
Vector4 t4 = c0 - c1; |
|
|
|
|
|
|
|
@ -205,9 +205,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils |
|
|
|
c1 = t1 + t2; c2 = t1 - t2; |
|
|
|
*/ |
|
|
|
|
|
|
|
Mm_store_ps(d, 0, c0 + c1); |
|
|
|
_mm_store_ps(d, 0, c0 + c1); |
|
|
|
|
|
|
|
Mm_store_ps(d, 32, c0 - c1); |
|
|
|
_mm_store_ps(d, 32, c0 - c1); |
|
|
|
|
|
|
|
/*y[0] = c0 + c1; |
|
|
|
y[4] = c0 - c1;*/ |
|
|
|
@ -215,9 +215,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils |
|
|
|
var w0 = new Vector4(0.541196f); |
|
|
|
var w1 = new Vector4(1.306563f); |
|
|
|
|
|
|
|
Mm_store_ps(d, 16, (w0 * c2) + (w1 * c3)); |
|
|
|
_mm_store_ps(d, 16, (w0 * c2) + (w1 * c3)); |
|
|
|
|
|
|
|
Mm_store_ps(d, 48, (w0 * c3) - (w1 * c2)); |
|
|
|
_mm_store_ps(d, 48, (w0 * c3) - (w1 * c2)); |
|
|
|
/* |
|
|
|
y[2] = c2 * r[6] + c3 * r[2]; |
|
|
|
y[6] = c3 * r[6] - c2 * r[2]; |
|
|
|
@ -241,23 +241,22 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils |
|
|
|
c1 = t6 * r[1] - t5 * r[7]; |
|
|
|
*/ |
|
|
|
|
|
|
|
Mm_store_ps(d, 24, c0 - c2); |
|
|
|
_mm_store_ps(d, 24, c0 - c2); |
|
|
|
|
|
|
|
Mm_store_ps(d, 40, c3 - c1); |
|
|
|
_mm_store_ps(d, 40, c3 - c1); |
|
|
|
|
|
|
|
// y[5] = c3 - c1; y[3] = c0 - c2;
|
|
|
|
var invsqrt2 = new Vector4(0.707107f); |
|
|
|
c0 = (c0 + c2) * invsqrt2; |
|
|
|
c3 = (c3 + c1) * invsqrt2; |
|
|
|
/* c0 = (c0 + c2) * invsqrt2; |
|
|
|
c3 = (c3 + c1) * invsqrt2; */ |
|
|
|
|
|
|
|
Mm_store_ps(d, 8, c0 + c3); |
|
|
|
// c0 = (c0 + c2) * invsqrt2;
|
|
|
|
// c3 = (c3 + c1) * invsqrt2;
|
|
|
|
_mm_store_ps(d, 8, c0 + c3); |
|
|
|
_mm_store_ps(d, 56, c0 - c3); |
|
|
|
|
|
|
|
Mm_store_ps(d, 56, c0 - c3); |
|
|
|
/* y[1] = c0 + c3; y[7] = c0 - c3; |
|
|
|
|
|
|
|
for(i = 0;i < 8;i++) |
|
|
|
// y[1] = c0 + c3; y[7] = c0 - c3;
|
|
|
|
/*for(i = 0;i < 8;i++) |
|
|
|
{ |
|
|
|
y[i] *= invsqrt2h; |
|
|
|
}*/ |
|
|
|
@ -279,49 +278,39 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils |
|
|
|
|
|
|
|
var c = new Vector4(0.1250f); |
|
|
|
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 0
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 1
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 2
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 3
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 4
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 5
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 6
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 7
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 8
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 9
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 10
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 11
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 12
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 13
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 14
|
|
|
|
Mm_store_ps(d, 0, Mm_load_ps(d, 0) * c); |
|
|
|
d = d.Slice(4); // 15
|
|
|
|
#pragma warning disable SA1107 // Code should not contain multiple statements on one line
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 0
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 1
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 2
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 3
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 4
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 5
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 6
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 7
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 8
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 9
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 10
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 11
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 12
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 13
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 14
|
|
|
|
_mm_store_ps(d, 0, _mm_load_ps(d, 0) * c); d = d.Slice(4); // 15
|
|
|
|
#pragma warning restore SA1107 // Code should not contain multiple statements on one line
|
|
|
|
} |
|
|
|
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
|
|
private static Vector4 Mm_load_ps(Span<float> src, int offset) |
|
|
|
#pragma warning disable SA1300 // Element should begin with upper-case letter
|
|
|
|
private static Vector4 _mm_load_ps(Span<float> src, int offset) |
|
|
|
#pragma warning restore SA1300 // Element should begin with upper-case letter
|
|
|
|
{ |
|
|
|
src = src.Slice(offset); |
|
|
|
return new Vector4(src[0], src[1], src[2], src[3]); |
|
|
|
} |
|
|
|
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
|
|
private static void Mm_store_ps(Span<float> dest, int offset, Vector4 src) |
|
|
|
#pragma warning disable SA1300 // Element should begin with upper-case letter
|
|
|
|
private static void _mm_store_ps(Span<float> dest, int offset, Vector4 src) |
|
|
|
#pragma warning restore SA1300 // Element should begin with upper-case letter
|
|
|
|
{ |
|
|
|
dest = dest.Slice(offset); |
|
|
|
dest[0] = src.X; |
|
|
|
@ -369,7 +358,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils |
|
|
|
float a0,a1,a2,a3,b0,b1,b2,b3; float z0,z1,z2,z3,z4; float r[8]; int i; |
|
|
|
for(i = 0;i < 8;i++){ r[i] = (float)(cos((double)i / 16.0 * M_PI) * M_SQRT2); } |
|
|
|
*/ |
|
|
|
|
|
|
|
/* |
|
|
|
0: 1.414214 |
|
|
|
1: 1.387040 |
|
|
|
@ -381,20 +369,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils |
|
|
|
7: 0.275899 |
|
|
|
*/ |
|
|
|
|
|
|
|
Vector4 my1 = Mm_load_ps(y, 8); |
|
|
|
Vector4 my7 = Mm_load_ps(y, 56); |
|
|
|
Vector4 my1 = _mm_load_ps(y, 8); |
|
|
|
Vector4 my7 = _mm_load_ps(y, 56); |
|
|
|
Vector4 mz0 = my1 + my7; |
|
|
|
|
|
|
|
Vector4 my3 = Mm_load_ps(y, 24); |
|
|
|
Vector4 my3 = _mm_load_ps(y, 24); |
|
|
|
Vector4 mz2 = my3 + my7; |
|
|
|
Vector4 my5 = Mm_load_ps(y, 40); |
|
|
|
Vector4 my5 = _mm_load_ps(y, 40); |
|
|
|
Vector4 mz1 = my3 + my5; |
|
|
|
Vector4 mz3 = my1 + my5; |
|
|
|
|
|
|
|
Vector4 mz4 = (mz0 + mz1) * _1_175876; |
|
|
|
/* z0 = y[1] + y[7]; z1 = y[3] + y[5]; z2 = y[3] + y[7]; z3 = y[1] + y[5]; |
|
|
|
z4 = (z0 + z1) * r[3];*/ |
|
|
|
|
|
|
|
// z0 = y[1] + y[7]; z1 = y[3] + y[5]; z2 = y[3] + y[7]; z3 = y[1] + y[5];
|
|
|
|
// z4 = (z0 + z1) * r[3];
|
|
|
|
mz2 = (mz2 * _1_961571) + mz4; |
|
|
|
mz3 = (mz3 * _0_390181) + mz4; |
|
|
|
mz0 = mz0 * _0_899976; |
|
|
|
@ -426,11 +414,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils |
|
|
|
b0 = y[1] * ( r[1] + r[3] - r[5] - r[7]) + z0 + z3; |
|
|
|
*/ |
|
|
|
|
|
|
|
Vector4 my2 = Mm_load_ps(y, 16); |
|
|
|
Vector4 my6 = Mm_load_ps(y, 48); |
|
|
|
Vector4 my2 = _mm_load_ps(y, 16); |
|
|
|
Vector4 my6 = _mm_load_ps(y, 48); |
|
|
|
mz4 = (my2 + my6) * _0_541196; |
|
|
|
Vector4 my0 = Mm_load_ps(y, 0); |
|
|
|
Vector4 my4 = Mm_load_ps(y, 32); |
|
|
|
Vector4 my0 = _mm_load_ps(y, 0); |
|
|
|
Vector4 my4 = _mm_load_ps(y, 32); |
|
|
|
mz0 = my0 + my4; |
|
|
|
mz1 = my0 - my4; |
|
|
|
|
|
|
|
@ -441,7 +429,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils |
|
|
|
my3 = mz0 - mz3; |
|
|
|
my1 = mz1 + mz2; |
|
|
|
my2 = mz1 - mz2; |
|
|
|
|
|
|
|
/* |
|
|
|
1.847759 |
|
|
|
0.765367 |
|
|
|
@ -453,22 +440,21 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils |
|
|
|
a1 = z1 + z2; a2 = z1 - z2; |
|
|
|
*/ |
|
|
|
|
|
|
|
Mm_store_ps(x, 0, my0 + mb0); |
|
|
|
|
|
|
|
Mm_store_ps(x, 56, my0 - mb0); |
|
|
|
_mm_store_ps(x, 0, my0 + mb0); |
|
|
|
|
|
|
|
Mm_store_ps(x, 8, my1 + mb1); |
|
|
|
_mm_store_ps(x, 56, my0 - mb0); |
|
|
|
|
|
|
|
Mm_store_ps(x, 48, my1 - mb1); |
|
|
|
_mm_store_ps(x, 8, my1 + mb1); |
|
|
|
|
|
|
|
Mm_store_ps(x, 16, my2 + mb2); |
|
|
|
_mm_store_ps(x, 48, my1 - mb1); |
|
|
|
|
|
|
|
Mm_store_ps(x, 40, my2 - mb2); |
|
|
|
_mm_store_ps(x, 16, my2 + mb2); |
|
|
|
|
|
|
|
Mm_store_ps(x, 24, my3 + mb3); |
|
|
|
_mm_store_ps(x, 40, my2 - mb2); |
|
|
|
|
|
|
|
Mm_store_ps(x, 32, my3 - mb3); |
|
|
|
_mm_store_ps(x, 24, my3 + mb3); |
|
|
|
|
|
|
|
_mm_store_ps(x, 32, my3 - mb3); |
|
|
|
/* |
|
|
|
x[0] = a0 + b0; x[7] = a0 - b0; |
|
|
|
x[1] = a1 + b1; x[6] = a1 - b1; |
|
|
|
@ -496,7 +482,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils |
|
|
|
|
|
|
|
const float invsqrt2 = 0.707107f; // (float)(1.0f / M_SQRT2);
|
|
|
|
|
|
|
|
// const float invsqrt2h = 0.353554f; // invsqrt2*0.5f;
|
|
|
|
// const float invsqrt2h = 0.353554f; //invsqrt2*0.5f;
|
|
|
|
c1 = x[0]; |
|
|
|
c2 = x[7]; |
|
|
|
t0 = c1 + c2; |
|
|
|
|