|
|
|
@ -456,9 +456,9 @@ namespace MathNet.Numerics.Algorithms.LinearAlgebra |
|
|
|
/// <param name="c">The c matrix.</param>
|
|
|
|
public virtual void MatrixMultiplyWithUpdate(Transpose transposeA, Transpose transposeB, Complex32 alpha, Complex32[] a, int rowsA, int columnsA, Complex32[] b, int rowsB, int columnsB, Complex32 beta, Complex32[] c) |
|
|
|
{ |
|
|
|
// Choose nonsensical values for the number of rows in c; fill them in depending
|
|
|
|
// on the operations on a and b.
|
|
|
|
int rowsC; |
|
|
|
int m; // The number of rows of matrix op(A) and of the matrix C.
|
|
|
|
int n; // The number of columns of matrix op(B) and of the matrix C.
|
|
|
|
int k; // The number of columns of matrix op(A) and the rows of the matrix op(B).
|
|
|
|
|
|
|
|
// First check some basic requirement on the parameters of the matrix multiplication.
|
|
|
|
if (a == null) |
|
|
|
@ -483,7 +483,9 @@ namespace MathNet.Numerics.Algorithms.LinearAlgebra |
|
|
|
throw new ArgumentOutOfRangeException(); |
|
|
|
} |
|
|
|
|
|
|
|
rowsC = columnsA; |
|
|
|
m = columnsA; |
|
|
|
n = rowsB; |
|
|
|
k = rowsA; |
|
|
|
} |
|
|
|
else if ((int)transposeA > 111) |
|
|
|
{ |
|
|
|
@ -497,7 +499,9 @@ namespace MathNet.Numerics.Algorithms.LinearAlgebra |
|
|
|
throw new ArgumentOutOfRangeException(); |
|
|
|
} |
|
|
|
|
|
|
|
rowsC = columnsA; |
|
|
|
m = columnsA; |
|
|
|
n = columnsB; |
|
|
|
k = rowsA; |
|
|
|
} |
|
|
|
else if ((int)transposeB > 111) |
|
|
|
{ |
|
|
|
@ -511,7 +515,9 @@ namespace MathNet.Numerics.Algorithms.LinearAlgebra |
|
|
|
throw new ArgumentOutOfRangeException(); |
|
|
|
} |
|
|
|
|
|
|
|
rowsC = rowsA; |
|
|
|
m = rowsA; |
|
|
|
n = rowsB; |
|
|
|
k = columnsA; |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
@ -525,7 +531,9 @@ namespace MathNet.Numerics.Algorithms.LinearAlgebra |
|
|
|
throw new ArgumentOutOfRangeException(); |
|
|
|
} |
|
|
|
|
|
|
|
rowsC = rowsA; |
|
|
|
m = rowsA; |
|
|
|
n = columnsB; |
|
|
|
k = columnsA; |
|
|
|
} |
|
|
|
|
|
|
|
if (alpha.IsZero() && beta.IsZero()) |
|
|
|
@ -557,268 +565,271 @@ namespace MathNet.Numerics.Algorithms.LinearAlgebra |
|
|
|
bdata = b; |
|
|
|
} |
|
|
|
|
|
|
|
if (alpha.IsOne()) |
|
|
|
if (beta.IsZero()) |
|
|
|
{ |
|
|
|
if (beta.IsZero()) |
|
|
|
Array.Clear(c, 0, c.Length); |
|
|
|
} |
|
|
|
else if (!beta.IsOne()) |
|
|
|
{ |
|
|
|
Control.LinearAlgebraProvider.ScaleArray(beta, c, c); |
|
|
|
} |
|
|
|
|
|
|
|
if (alpha.IsZero()) |
|
|
|
{ |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, adata, 0, 0, bdata, 0, 0, c, 0, 0, m, n, k, m, n, k, true); |
|
|
|
} |
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Cache-Oblivious Matrix Multiplication
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="transposeA">if set to <c>true</c> transpose matrix A.</param>
|
|
|
|
/// <param name="transposeB">if set to <c>true</c> transpose matrix B.</param>
|
|
|
|
/// <param name="alpha">The value to scale the matrix A with.</param>
|
|
|
|
/// <param name="matrixA">The matrix A.</param>
|
|
|
|
/// <param name="shiftArow">Row-shift of the left matrix</param>
|
|
|
|
/// <param name="shiftAcol">Column-shift of the left matrix</param>
|
|
|
|
/// <param name="matrixB">The matrix B.</param>
|
|
|
|
/// <param name="shiftBrow">Row-shift of the right matrix</param>
|
|
|
|
/// <param name="shiftBcol">Column-shift of the right matrix</param>
|
|
|
|
/// <param name="result">The matrix C.</param>
|
|
|
|
/// <param name="shiftCrow">Row-shift of the result matrix</param>
|
|
|
|
/// <param name="shiftCcol">Column-shift of the result matrix</param>
|
|
|
|
/// <param name="m">The number of rows of matrix op(A) and of the matrix C.</param>
|
|
|
|
/// <param name="n">The number of columns of matrix op(B) and of the matrix C.</param>
|
|
|
|
/// <param name="k">The number of columns of matrix op(A) and the rows of the matrix op(B).</param>
|
|
|
|
/// <param name="constM">The constant number of rows of matrix op(A) and of the matrix C.</param>
|
|
|
|
/// <param name="constN">The constant number of columns of matrix op(B) and of the matrix C.</param>
|
|
|
|
/// <param name="constK">The constant number of columns of matrix op(A) and the rows of the matrix op(B).</param>
|
|
|
|
/// <param name="first">Indicates if this is the first recursion.</param>
|
|
|
|
private static void CacheObliviousMatrixMultiply(Transpose transposeA, Transpose transposeB, Complex32 alpha, Complex32[] matrixA, int shiftArow, int shiftAcol, Complex32[] matrixB, int shiftBrow, int shiftBcol, Complex32[] result, int shiftCrow, int shiftCcol, int m, int n, int k, int constM, int constN, int constK, bool first) |
|
|
|
{ |
|
|
|
if (m + n + k <= Control.ParallelizeOrder) |
|
|
|
{ |
|
|
|
if ((int)transposeA > 111 && (int)transposeB > 111) |
|
|
|
{ |
|
|
|
if ((int)transposeA > 111 && (int)transposeB > 111) |
|
|
|
if ((int)transposeA > 112 && (int)transposeB > 112) |
|
|
|
{ |
|
|
|
CommonParallel.For( |
|
|
|
0, |
|
|
|
columnsA, |
|
|
|
j => |
|
|
|
for (var m1 = 0; m1 < m; m1++) |
|
|
|
{ |
|
|
|
var matArowPos = m1 + shiftArow; |
|
|
|
var matCrowPos = m1 + shiftCrow; |
|
|
|
for (var n1 = 0; n1 < n; ++n1) |
|
|
|
{ |
|
|
|
var jIndex = j * rowsC; |
|
|
|
for (var i = 0; i != rowsB; i++) |
|
|
|
var matBcolPos = n1 + shiftBcol; |
|
|
|
var sum = Complex32.Zero; |
|
|
|
for (var k1 = 0; k1 < k; ++k1) |
|
|
|
{ |
|
|
|
var iIndex = i * rowsA; |
|
|
|
Complex32 s = 0; |
|
|
|
for (var l = 0; l != columnsB; l++) |
|
|
|
{ |
|
|
|
s += adata[iIndex + l] * bdata[(l * rowsB) + j]; |
|
|
|
} |
|
|
|
|
|
|
|
c[jIndex + i] = s; |
|
|
|
sum += matrixA[(matArowPos * constK) + k1 + shiftAcol].Conjugate() * |
|
|
|
matrixB[((k1 + shiftBrow) * constN) + matBcolPos].Conjugate(); |
|
|
|
} |
|
|
|
}); |
|
|
|
|
|
|
|
result[((n1 + shiftCcol) * constM) + matCrowPos] += alpha * sum; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
else if ((int)transposeA > 111) |
|
|
|
else if ((int)transposeA > 112) |
|
|
|
{ |
|
|
|
CommonParallel.For( |
|
|
|
0, |
|
|
|
columnsB, |
|
|
|
j => |
|
|
|
for (var m1 = 0; m1 < m; m1++) |
|
|
|
{ |
|
|
|
var matArowPos = m1 + shiftArow; |
|
|
|
var matCrowPos = m1 + shiftCrow; |
|
|
|
for (var n1 = 0; n1 < n; ++n1) |
|
|
|
{ |
|
|
|
var jcIndex = j * rowsC; |
|
|
|
var jbIndex = j * rowsB; |
|
|
|
for (var i = 0; i != columnsA; i++) |
|
|
|
var matBcolPos = n1 + shiftBcol; |
|
|
|
var sum = Complex32.Zero; |
|
|
|
for (var k1 = 0; k1 < k; ++k1) |
|
|
|
{ |
|
|
|
var iIndex = i * rowsA; |
|
|
|
Complex32 s = 0; |
|
|
|
for (var l = 0; l != rowsA; l++) |
|
|
|
{ |
|
|
|
s += adata[iIndex + l] * bdata[jbIndex + l]; |
|
|
|
} |
|
|
|
|
|
|
|
c[jcIndex + i] = s; |
|
|
|
sum += matrixA[(matArowPos * constK) + k1 + shiftAcol].Conjugate() * |
|
|
|
matrixB[((k1 + shiftBrow) * constN) + matBcolPos]; |
|
|
|
} |
|
|
|
}); |
|
|
|
|
|
|
|
result[((n1 + shiftCcol) * constM) + matCrowPos] += alpha * sum; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
else if ((int)transposeB > 111) |
|
|
|
else if ((int)transposeB > 112) |
|
|
|
{ |
|
|
|
CommonParallel.For( |
|
|
|
0, |
|
|
|
rowsB, |
|
|
|
j => |
|
|
|
for (var m1 = 0; m1 < m; m1++) |
|
|
|
{ |
|
|
|
var matArowPos = m1 + shiftArow; |
|
|
|
var matCrowPos = m1 + shiftCrow; |
|
|
|
for (var n1 = 0; n1 < n; ++n1) |
|
|
|
{ |
|
|
|
var jIndex = j * rowsC; |
|
|
|
for (var i = 0; i != rowsA; i++) |
|
|
|
var matBcolPos = n1 + shiftBcol; |
|
|
|
var sum = Complex32.Zero; |
|
|
|
for (var k1 = 0; k1 < k; ++k1) |
|
|
|
{ |
|
|
|
Complex32 s = 0; |
|
|
|
for (var l = 0; l != columnsA; l++) |
|
|
|
{ |
|
|
|
s += adata[(l * rowsA) + i] * bdata[(l * rowsB) + j]; |
|
|
|
} |
|
|
|
|
|
|
|
c[jIndex + i] = s; |
|
|
|
sum += matrixA[(matArowPos * constK) + k1 + shiftAcol] * |
|
|
|
matrixB[((k1 + shiftBrow) * constN) + matBcolPos].Conjugate(); |
|
|
|
} |
|
|
|
}); |
|
|
|
|
|
|
|
result[((n1 + shiftCcol) * constM) + matCrowPos] += alpha * sum; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
CommonParallel.For( |
|
|
|
0, |
|
|
|
columnsB, |
|
|
|
j => |
|
|
|
for (var m1 = 0; m1 < m; m1++) |
|
|
|
{ |
|
|
|
var matArowPos = m1 + shiftArow; |
|
|
|
var matCrowPos = m1 + shiftCrow; |
|
|
|
for (var n1 = 0; n1 < n; ++n1) |
|
|
|
{ |
|
|
|
var jcIndex = j * rowsC; |
|
|
|
var jbIndex = j * rowsB; |
|
|
|
for (var i = 0; i != rowsA; i++) |
|
|
|
var matBcolPos = n1 + shiftBcol; |
|
|
|
var sum = Complex32.Zero; |
|
|
|
for (var k1 = 0; k1 < k; ++k1) |
|
|
|
{ |
|
|
|
Complex32 s = 0; |
|
|
|
for (var l = 0; l != columnsA; l++) |
|
|
|
{ |
|
|
|
s += adata[(l * rowsA) + i] * bdata[jbIndex + l]; |
|
|
|
} |
|
|
|
|
|
|
|
c[jcIndex + i] = s; |
|
|
|
sum += matrixA[(matArowPos * constK) + k1 + shiftAcol] * |
|
|
|
matrixB[((k1 + shiftBrow) * constN) + matBcolPos]; |
|
|
|
} |
|
|
|
}); |
|
|
|
|
|
|
|
result[((n1 + shiftCcol) * constM) + matCrowPos] += alpha * sum; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
else if ((int)transposeA > 111) |
|
|
|
{ |
|
|
|
if ((int)transposeA > 111 && (int)transposeB > 111) |
|
|
|
if ((int)transposeA > 112) |
|
|
|
{ |
|
|
|
CommonParallel.For( |
|
|
|
0, |
|
|
|
columnsA, |
|
|
|
j => |
|
|
|
{ |
|
|
|
var jIndex = j * rowsC; |
|
|
|
for (var i = 0; i != rowsB; i++) |
|
|
|
{ |
|
|
|
var iIndex = i * rowsA; |
|
|
|
Complex32 s = 0; |
|
|
|
for (var l = 0; l != columnsB; l++) |
|
|
|
{ |
|
|
|
s += adata[iIndex + l] * bdata[(l * rowsB) + j]; |
|
|
|
} |
|
|
|
|
|
|
|
c[jIndex + i] = (c[jIndex + i] * beta) + s; |
|
|
|
} |
|
|
|
}); |
|
|
|
} |
|
|
|
else if ((int)transposeA > 111) |
|
|
|
{ |
|
|
|
CommonParallel.For( |
|
|
|
0, |
|
|
|
columnsB, |
|
|
|
j => |
|
|
|
for (var m1 = 0; m1 < m; m1++) |
|
|
|
{ |
|
|
|
var matArowPos = m1 + shiftArow; |
|
|
|
var matCrowPos = m1 + shiftCrow; |
|
|
|
for (var n1 = 0; n1 < n; ++n1) |
|
|
|
{ |
|
|
|
var jcIndex = j * rowsC; |
|
|
|
var jbIndex = j * rowsB; |
|
|
|
for (var i = 0; i != columnsA; i++) |
|
|
|
var matBcolPos = n1 + shiftBcol; |
|
|
|
var sum = Complex32.Zero; |
|
|
|
for (var k1 = 0; k1 < k; ++k1) |
|
|
|
{ |
|
|
|
var iIndex = i * rowsA; |
|
|
|
Complex32 s = 0; |
|
|
|
for (var l = 0; l != rowsA; l++) |
|
|
|
{ |
|
|
|
s += adata[iIndex + l] * bdata[jbIndex + l]; |
|
|
|
} |
|
|
|
|
|
|
|
c[jcIndex + i] = s + (c[jcIndex + i] * beta); |
|
|
|
sum += matrixA[(matArowPos * constK) + k1 + shiftAcol].Conjugate() * |
|
|
|
matrixB[(matBcolPos * constK) + k1 + shiftBrow]; |
|
|
|
} |
|
|
|
}); |
|
|
|
} |
|
|
|
else if ((int)transposeB > 111) |
|
|
|
{ |
|
|
|
CommonParallel.For( |
|
|
|
0, |
|
|
|
rowsB, |
|
|
|
j => |
|
|
|
{ |
|
|
|
var jIndex = j * rowsC; |
|
|
|
for (var i = 0; i != rowsA; i++) |
|
|
|
{ |
|
|
|
Complex32 s = 0; |
|
|
|
for (var l = 0; l != columnsA; l++) |
|
|
|
{ |
|
|
|
s += adata[(l * rowsA) + i] * bdata[(l * rowsB) + j]; |
|
|
|
} |
|
|
|
|
|
|
|
c[jIndex + i] = s + (c[jIndex + i] * beta); |
|
|
|
} |
|
|
|
}); |
|
|
|
result[((n1 + shiftCcol) * constM) + matCrowPos] += alpha * sum; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
CommonParallel.For( |
|
|
|
0, |
|
|
|
columnsB, |
|
|
|
j => |
|
|
|
for (var m1 = 0; m1 < m; m1++) |
|
|
|
{ |
|
|
|
var matArowPos = m1 + shiftArow; |
|
|
|
var matCrowPos = m1 + shiftCrow; |
|
|
|
for (var n1 = 0; n1 < n; ++n1) |
|
|
|
{ |
|
|
|
var jcIndex = j * rowsC; |
|
|
|
var jbIndex = j * rowsB; |
|
|
|
for (var i = 0; i != rowsA; i++) |
|
|
|
var matBcolPos = n1 + shiftBcol; |
|
|
|
var sum = Complex32.Zero; |
|
|
|
for (var k1 = 0; k1 < k; ++k1) |
|
|
|
{ |
|
|
|
Complex32 s = 0; |
|
|
|
for (var l = 0; l != columnsA; l++) |
|
|
|
{ |
|
|
|
s += adata[(l * rowsA) + i] * bdata[jbIndex + l]; |
|
|
|
} |
|
|
|
|
|
|
|
c[jcIndex + i] = s + (c[jcIndex + i] * beta); |
|
|
|
sum += matrixA[(matArowPos * constK) + k1 + shiftAcol] * |
|
|
|
matrixB[(matBcolPos * constK) + k1 + shiftBrow]; |
|
|
|
} |
|
|
|
}); |
|
|
|
|
|
|
|
result[((n1 + shiftCcol) * constM) + matCrowPos] += alpha * sum; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
if ((int)transposeA > 111 && (int)transposeB > 111) |
|
|
|
else if ((int)transposeB > 111) |
|
|
|
{ |
|
|
|
CommonParallel.For( |
|
|
|
0, |
|
|
|
columnsA, |
|
|
|
j => |
|
|
|
if ((int)transposeB > 112) |
|
|
|
{ |
|
|
|
for (var m1 = 0; m1 < m; m1++) |
|
|
|
{ |
|
|
|
var jIndex = j * rowsC; |
|
|
|
for (var i = 0; i != rowsB; i++) |
|
|
|
var matArowPos = m1 + shiftArow; |
|
|
|
var matCrowPos = m1 + shiftCrow; |
|
|
|
for (var n1 = 0; n1 < n; ++n1) |
|
|
|
{ |
|
|
|
var iIndex = i * rowsA; |
|
|
|
Complex32 s = 0; |
|
|
|
for (var l = 0; l != columnsB; l++) |
|
|
|
var matBcolPos = n1 + shiftBcol; |
|
|
|
var sum = Complex32.Zero; |
|
|
|
for (var k1 = 0; k1 < k; ++k1) |
|
|
|
{ |
|
|
|
s += adata[iIndex + l] * bdata[(l * rowsB) + j]; |
|
|
|
sum += matrixA[((k1 + shiftAcol) * constM) + matArowPos] * |
|
|
|
matrixB[((k1 + shiftBrow) * constN) + matBcolPos].Conjugate(); |
|
|
|
} |
|
|
|
|
|
|
|
c[jIndex + i] = (c[jIndex + i] * beta) + (alpha * s); |
|
|
|
result[((n1 + shiftCcol) * constM) + matCrowPos] += alpha * sum; |
|
|
|
} |
|
|
|
}); |
|
|
|
} |
|
|
|
else if ((int)transposeA > 111) |
|
|
|
{ |
|
|
|
CommonParallel.For( |
|
|
|
0, |
|
|
|
columnsB, |
|
|
|
j => |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
for (var m1 = 0; m1 < m; m1++) |
|
|
|
{ |
|
|
|
var jcIndex = j * rowsC; |
|
|
|
var jbIndex = j * rowsB; |
|
|
|
for (var i = 0; i != columnsA; i++) |
|
|
|
var matArowPos = m1 + shiftArow; |
|
|
|
var matCrowPos = m1 + shiftCrow; |
|
|
|
for (var n1 = 0; n1 < n; ++n1) |
|
|
|
{ |
|
|
|
var iIndex = i * rowsA; |
|
|
|
Complex32 s = 0; |
|
|
|
for (var l = 0; l != rowsA; l++) |
|
|
|
var matBcolPos = n1 + shiftBcol; |
|
|
|
var sum = Complex32.Zero; |
|
|
|
for (var k1 = 0; k1 < k; ++k1) |
|
|
|
{ |
|
|
|
s += adata[iIndex + l] * bdata[jbIndex + l]; |
|
|
|
sum += matrixA[((k1 + shiftAcol) * constM) + matArowPos] * |
|
|
|
matrixB[((k1 + shiftBrow) * constN) + matBcolPos]; |
|
|
|
} |
|
|
|
|
|
|
|
c[jcIndex + i] = (alpha * s) + (c[jcIndex + i] * beta); |
|
|
|
result[((n1 + shiftCcol) * constM) + matCrowPos] += alpha * sum; |
|
|
|
} |
|
|
|
}); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
else if ((int)transposeB > 111) |
|
|
|
else |
|
|
|
{ |
|
|
|
CommonParallel.For( |
|
|
|
0, |
|
|
|
rowsB, |
|
|
|
j => |
|
|
|
for (var m1 = 0; m1 < m; m1++) |
|
|
|
{ |
|
|
|
var matArowPos = m1 + shiftArow; |
|
|
|
var matCrowPos = m1 + shiftCrow; |
|
|
|
for (var n1 = 0; n1 < n; ++n1) |
|
|
|
{ |
|
|
|
var jIndex = j * rowsC; |
|
|
|
for (var i = 0; i != rowsA; i++) |
|
|
|
var matBcolPos = n1 + shiftBcol; |
|
|
|
var sum = Complex32.Zero; |
|
|
|
for (var k1 = 0; k1 < k; ++k1) |
|
|
|
{ |
|
|
|
Complex32 s = 0; |
|
|
|
for (var l = 0; l != columnsA; l++) |
|
|
|
{ |
|
|
|
s += adata[(l * rowsA) + i] * bdata[(l * rowsB) + j]; |
|
|
|
} |
|
|
|
|
|
|
|
c[jIndex + i] = (alpha * s) + (c[jIndex + i] * beta); |
|
|
|
sum += matrixA[((k1 + shiftAcol) * constM) + matArowPos] * |
|
|
|
matrixB[(matBcolPos * constK) + k1 + shiftBrow]; |
|
|
|
} |
|
|
|
}); |
|
|
|
|
|
|
|
result[((n1 + shiftCcol) * constM) + matCrowPos] += alpha * sum; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
// divide and conquer
|
|
|
|
int m2 = m / 2, n2 = n / 2, k2 = k / 2; |
|
|
|
|
|
|
|
if (first) |
|
|
|
{ |
|
|
|
CommonParallel.Invoke( |
|
|
|
() => CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow, shiftAcol, matrixB, shiftBrow, shiftBcol, result, shiftCrow, shiftCcol, m2, n2, k2, constM, constN, constK, false), |
|
|
|
() => CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow, shiftAcol, matrixB, shiftBrow, shiftBcol + n2, result, shiftCrow, shiftCcol + n2, m2, n - n2, k2, constM, constN, constK, false), |
|
|
|
() => CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol, result, shiftCrow, shiftCcol, m2, n2, k - k2, constM, constN, constK, false), |
|
|
|
() => CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol + n2, result, shiftCrow, shiftCcol + n2, m2, n - n2, k - k2, constM, constN, constK, false)); |
|
|
|
|
|
|
|
CommonParallel.Invoke( |
|
|
|
() => CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow + m2, shiftAcol, matrixB, shiftBrow, shiftBcol, result, shiftCrow + m2, shiftCcol, m - m2, n2, k2, constM, constN, constK, false), |
|
|
|
() => CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow + m2, shiftAcol, matrixB, shiftBrow, shiftBcol + n2, result, shiftCrow + m2, shiftCcol + n2, m - m2, n - n2, k2, constM, constN, constK, false), |
|
|
|
() => CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow + m2, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol, result, shiftCrow + m2, shiftCcol, m - m2, n2, k - k2, constM, constN, constK, false), |
|
|
|
() => CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow + m2, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol + n2, result, shiftCrow + m2, shiftCcol + n2, m - m2, n - n2, k - k2, constM, constN, constK, false)); |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
CommonParallel.For( |
|
|
|
0, |
|
|
|
columnsB, |
|
|
|
j => |
|
|
|
{ |
|
|
|
var jcIndex = j * rowsC; |
|
|
|
var jbIndex = j * rowsB; |
|
|
|
for (var i = 0; i != rowsA; i++) |
|
|
|
{ |
|
|
|
Complex32 s = 0; |
|
|
|
for (var l = 0; l != columnsA; l++) |
|
|
|
{ |
|
|
|
s += adata[(l * rowsA) + i] * bdata[jbIndex + l]; |
|
|
|
} |
|
|
|
CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow, shiftAcol, matrixB, shiftBrow, shiftBcol, result, shiftCrow, shiftCcol, m2, n2, k2, constM, constN, constK, false); |
|
|
|
CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow, shiftAcol, matrixB, shiftBrow, shiftBcol + n2, result, shiftCrow, shiftCcol + n2, m2, n - n2, k2, constM, constN, constK, false); |
|
|
|
|
|
|
|
c[jcIndex + i] = (alpha * s) + (c[jcIndex + i] * beta); |
|
|
|
} |
|
|
|
}); |
|
|
|
CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol, result, shiftCrow, shiftCcol, m2, n2, k - k2, constM, constN, constK, false); |
|
|
|
CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol + n2, result, shiftCrow, shiftCcol + n2, m2, n - n2, k - k2, constM, constN, constK, false); |
|
|
|
|
|
|
|
CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow + m2, shiftAcol, matrixB, shiftBrow, shiftBcol, result, shiftCrow + m2, shiftCcol, m - m2, n2, k2, constM, constN, constK, false); |
|
|
|
CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow + m2, shiftAcol, matrixB, shiftBrow, shiftBcol + n2, result, shiftCrow + m2, shiftCcol + n2, m - m2, n - n2, k2, constM, constN, constK, false); |
|
|
|
|
|
|
|
CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow + m2, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol, result, shiftCrow + m2, shiftCcol, m - m2, n2, k - k2, constM, constN, constK, false); |
|
|
|
CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, matrixA, shiftArow + m2, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol + n2, result, shiftCrow + m2, shiftCcol + n2, m - m2, n - n2, k - k2, constM, constN, constK, false); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|