Compare commits

...

5 Commits
master ... cuda

  1. 24
      MathNet.Numerics.NativeProviders.sln
  2. 153
      src/NativeProviders/CUDA/blas.cpp
  3. 63
      src/NativeProviders/CUDA/capabilities.cpp
  4. 560
      src/NativeProviders/CUDA/lapack.cpp
  5. 0
      src/NativeProviders/CUDA/memory.c
  6. 18
      src/NativeProviders/CUDA/wrapper_cuda.h
  7. 1
      src/NativeProviders/Windows/CUDA/CUDAWrapper.vcxproj
  8. 3
      src/NativeProviders/Windows/CUDA/CUDAWrapper.vcxproj.filters
  9. 4
      src/Numerics/Control.cs
  10. 4
      src/Numerics/Numerics.csproj
  11. 110
      src/Numerics/Providers/LinearAlgebra/Cuda/CuSolverException.cs
  12. 102
      src/Numerics/Providers/LinearAlgebra/Cuda/CublasException.cs
  13. 90
      src/Numerics/Providers/LinearAlgebra/Cuda/CudaException.cs
  14. 53
      src/Numerics/Providers/LinearAlgebra/Cuda/CudaLinearAlgebraProvider.Complex.cs
  15. 53
      src/Numerics/Providers/LinearAlgebra/Cuda/CudaLinearAlgebraProvider.Complex32.cs
  16. 53
      src/Numerics/Providers/LinearAlgebra/Cuda/CudaLinearAlgebraProvider.Double.cs
  17. 53
      src/Numerics/Providers/LinearAlgebra/Cuda/CudaLinearAlgebraProvider.Single.cs
  18. 100
      src/Numerics/Providers/LinearAlgebra/Cuda/CudaLinearAlgebraProvider.cs
  19. 58
      src/Numerics/Providers/LinearAlgebra/Cuda/CudaResults.cs
  20. 112
      src/Numerics/Providers/LinearAlgebra/Cuda/SafeNativeMethods.cs

24
MathNet.Numerics.NativeProviders.sln

@ -170,6 +170,7 @@ Global
{3515A344-AB5F-41C7-A14C-04A79B3FFAB1}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
{3515A344-AB5F-41C7-A14C-04A79B3FFAB1}.Debug|Win32.ActiveCfg = Debug|Any CPU
{3515A344-AB5F-41C7-A14C-04A79B3FFAB1}.Debug|x64.ActiveCfg = Debug|Any CPU
{3515A344-AB5F-41C7-A14C-04A79B3FFAB1}.Debug|x64.Build.0 = Debug|Any CPU
{3515A344-AB5F-41C7-A14C-04A79B3FFAB1}.Release|Any CPU.ActiveCfg = Release|Any CPU
{3515A344-AB5F-41C7-A14C-04A79B3FFAB1}.Release|Any CPU.Build.0 = Release|Any CPU
{3515A344-AB5F-41C7-A14C-04A79B3FFAB1}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
@ -196,6 +197,7 @@ Global
{3515A344-AB5F-41C7-A14C-04A79B3FFAB1}.Release-OpenBLAS|Mixed Platforms.Build.0 = Release|Any CPU
{3515A344-AB5F-41C7-A14C-04A79B3FFAB1}.Release-OpenBLAS|Win32.ActiveCfg = Release|Any CPU
{3515A344-AB5F-41C7-A14C-04A79B3FFAB1}.Release-OpenBLAS|x64.ActiveCfg = Release|Any CPU
{3515A344-AB5F-41C7-A14C-04A79B3FFAB1}.Release|x64.Build.0 = Release|Any CPU
{3515A344-AB5F-41C7-A14C-04A79B3FFAB1}.Release-Signed|Any CPU.ActiveCfg = Release|Any CPU
{3515A344-AB5F-41C7-A14C-04A79B3FFAB1}.Release-Signed|Any CPU.Build.0 = Release|Any CPU
{3515A344-AB5F-41C7-A14C-04A79B3FFAB1}.Release-Signed|Mixed Platforms.ActiveCfg = Release|Any CPU
@ -245,6 +247,7 @@ Global
{E79C0395-01DC-4BC9-B86C-ED45790892C5}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
{E79C0395-01DC-4BC9-B86C-ED45790892C5}.Debug|Win32.ActiveCfg = Debug|Any CPU
{E79C0395-01DC-4BC9-B86C-ED45790892C5}.Debug|x64.ActiveCfg = Debug|Any CPU
{E79C0395-01DC-4BC9-B86C-ED45790892C5}.Debug|x64.Build.0 = Debug|Any CPU
{E79C0395-01DC-4BC9-B86C-ED45790892C5}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E79C0395-01DC-4BC9-B86C-ED45790892C5}.Release|Any CPU.Build.0 = Release|Any CPU
{E79C0395-01DC-4BC9-B86C-ED45790892C5}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
@ -271,6 +274,7 @@ Global
{E79C0395-01DC-4BC9-B86C-ED45790892C5}.Release-OpenBLAS|Mixed Platforms.Build.0 = Release|Any CPU
{E79C0395-01DC-4BC9-B86C-ED45790892C5}.Release-OpenBLAS|Win32.ActiveCfg = Release|Any CPU
{E79C0395-01DC-4BC9-B86C-ED45790892C5}.Release-OpenBLAS|x64.ActiveCfg = Release|Any CPU
{E79C0395-01DC-4BC9-B86C-ED45790892C5}.Release|x64.Build.0 = Release|Any CPU
{E79C0395-01DC-4BC9-B86C-ED45790892C5}.Release-Signed|Any CPU.ActiveCfg = Release|Any CPU
{E79C0395-01DC-4BC9-B86C-ED45790892C5}.Release-Signed|Any CPU.Build.0 = Release|Any CPU
{E79C0395-01DC-4BC9-B86C-ED45790892C5}.Release-Signed|Mixed Platforms.ActiveCfg = Release|Any CPU
@ -344,6 +348,26 @@ Global
{96B903EF-3EE1-4569-803C-0482D2F5ED37}.Release-Signed|Mixed Platforms.Build.0 = Release|Any CPU
{96B903EF-3EE1-4569-803C-0482D2F5ED37}.Release-Signed|Win32.ActiveCfg = Release|Any CPU
{96B903EF-3EE1-4569-803C-0482D2F5ED37}.Release-Signed|x64.ActiveCfg = Release|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Debug|Any CPU.Build.0 = Debug|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Debug|Win32.ActiveCfg = Debug|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Debug|x64.ActiveCfg = Debug|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Debug|x64.Build.0 = Debug|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Release|Any CPU.ActiveCfg = Release|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Release|Any CPU.Build.0 = Release|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Release|Mixed Platforms.Build.0 = Release|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Release|Win32.ActiveCfg = Release|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Release|x64.ActiveCfg = Release|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Release|x64.Build.0 = Release|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Release-Signed|Any CPU.ActiveCfg = Release|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Release-Signed|Any CPU.Build.0 = Release|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Release-Signed|Mixed Platforms.ActiveCfg = Release|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Release-Signed|Mixed Platforms.Build.0 = Release|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Release-Signed|Win32.ActiveCfg = Release|Any CPU
{2386FAD1-BB99-4597-885C-8EF81D0637BA}.Release-Signed|x64.ActiveCfg = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

153
src/NativeProviders/CUDA/blas.cpp

@ -1,167 +1,176 @@
#include <stdio.h>
#include "cublas_v2.h"
#include "cuda_runtime.h"
#include "wrapper_common.h"
#include "wrapper_cuda.h"
template<typename T, typename AXPY>
void cuda_axpy(const cublasHandle_t blasHandle, const int n, const T alpha, const T x[], int incX, T y[], int incY, AXPY axpy)
CudaResults cuda_axpy(const cublasHandle_t blasHandle, const int n, const T alpha, const T x[], int incX, T y[], int incY, AXPY axpy)
{
T *d_X = NULL;
T *d_Y = NULL;
cudaMalloc((void**)&d_X, n*sizeof(T));
cudaMalloc((void**)&d_Y, n*sizeof(T));
CudaResults results = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
cublasSetVector(n, sizeof(T), x, incX, d_X, incX);
cublasSetVector(n, sizeof(T), y, incY, d_Y, incY);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_X, n*sizeof(T)));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_Y, n*sizeof(T)));
axpy(blasHandle, n, &alpha, d_X, incX, d_Y, incX);
SAFECUDACALL(results.blasStatus, cublasSetVector(n, sizeof(T), x, incX, d_X, incX));
SAFECUDACALL(results.blasStatus, cublasSetVector(n, sizeof(T), y, incY, d_Y, incY));
cublasGetVector(n, sizeof(T), d_Y, incY, y, incY);
SAFECUDACALL(results.blasStatus, axpy(blasHandle, n, &alpha, d_X, incX, d_Y, incX));
SAFECUDACALL(results.blasStatus, cublasGetVector(n, sizeof(T), d_Y, incY, y, incY));
exit:
cudaFree(d_X);
cudaFree(d_Y);
return results;
}
template<typename T, typename SCAL>
void cuda_scal(const cublasHandle_t blasHandle, const int n, const T alpha, T x[], int incX, SCAL scal)
CudaResults cuda_scal(const cublasHandle_t blasHandle, const int n, const T alpha, T x[], int incX, SCAL scal)
{
T *d_X = NULL;
cudaMalloc((void**)&d_X, n*sizeof(T));
cublasSetVector(n, sizeof(T), x, incX, d_X, incX);
scal(blasHandle, n, &alpha, d_X, incX);
CudaResults results = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
cublasGetVector(n, sizeof(T), d_X, incX, x, incX);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_X, n*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetVector(n, sizeof(T), x, incX, d_X, incX));
SAFECUDACALL(results.blasStatus, scal(blasHandle, n, &alpha, d_X, incX));
SAFECUDACALL(results.blasStatus, cublasGetVector(n, sizeof(T), d_X, incX, x, incX));
exit:
cudaFree(d_X);
return results;
}
template<typename T, typename DOT>
void cuda_dot(const cublasHandle_t blasHandle, const int n, const T x[], int incX, const T y[], int incY, T* result, DOT dot)
CudaResults cuda_dot(const cublasHandle_t blasHandle, const int n, const T x[], int incX, const T y[], int incY, T* result, DOT dot)
{
T *d_X = NULL;
T *d_Y = NULL;
cudaMalloc((void**)&d_X, n*sizeof(T));
cudaMalloc((void**)&d_Y, n*sizeof(T));
CudaResults results = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
SAFECUDACALL(results.error, cudaMalloc((void**)&d_X, n*sizeof(T)));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_Y, n*sizeof(T)));
cublasSetVector(n, sizeof(T), x, incX, d_X, incX);
cublasSetVector(n, sizeof(T), y, incY, d_Y, incY);
SAFECUDACALL(results.blasStatus, cublasSetVector(n, sizeof(T), x, incX, d_X, incX));
SAFECUDACALL(results.blasStatus, cublasSetVector(n, sizeof(T), y, incY, d_Y, incY));
dot(blasHandle, n, d_X, incX, d_Y, incY, result);
SAFECUDACALL(results.blasStatus, dot(blasHandle, n, d_X, incX, d_Y, incY, result));
exit:
cudaFree(d_X);
cudaFree(d_Y);
return results;
}
template<typename T, typename GEMM>
void cuda_gemm(const cublasHandle_t handle, const cublasOperation_t transa, const cublasOperation_t transb, int m, int n, int k, const T alpha, const T A[], int lda, const T B[], int ldb, const T beta, T C[], int ldc, GEMM gemm)
CudaResults cuda_gemm(const cublasHandle_t handle, const cublasOperation_t transa, const cublasOperation_t transb, int m, int n, int k, const T alpha, const T A[], int lda, const T B[], int ldb, const T beta, T C[], int ldc, GEMM gemm)
{
T *d_A = NULL;
cudaMalloc((void**)&d_A, m*k*sizeof(T));
cublasSetMatrix(m, k, sizeof(T), A, m, d_A, m);
T *d_B = NULL;
cudaMalloc((void**)&d_B, k*n*sizeof(T));
cublasSetMatrix(k, n, sizeof(T), B, k, d_B, k);
T *d_C = NULL;
cudaMalloc((void**)&d_C, m*n*sizeof(T));
cublasSetMatrix(m, n, sizeof(T), C, m, d_C, m);
CudaResults results = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
SAFECUDACALL(results.error, cudaMalloc((void**)&d_A, m*k*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(m, k, sizeof(T), A, m, d_A, m));
gemm(handle, transa, transb, m, n, k, &alpha, d_A, lda, d_B, ldb, &beta, d_C, ldc);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_B, k*n*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(k, n, sizeof(T), B, k, d_B, k));
cublasGetMatrix(m, n, sizeof(T), d_C, m, C, m);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_C, m*n*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(m, n, sizeof(T), C, m, d_C, m));
SAFECUDACALL(results.blasStatus, gemm(handle, transa, transb, m, n, k, &alpha, d_A, lda, d_B, ldb, &beta, d_C, ldc));
SAFECUDACALL(results.blasStatus, cublasGetMatrix(m, n, sizeof(T), d_C, m, C, m));
exit:
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
return results;
}
extern "C" {
DLLEXPORT void s_axpy(const cublasHandle_t blasHandle, const int n, const float alpha, const float x[], float y[]){
cuda_axpy(blasHandle, n, alpha, x, 1, y, 1, cublasSaxpy);
DLLEXPORT CudaResults s_axpy(const cublasHandle_t blasHandle, const int n, const float alpha, const float x[], float y[]){
return cuda_axpy(blasHandle, n, alpha, x, 1, y, 1, cublasSaxpy);
}
DLLEXPORT void d_axpy(const cublasHandle_t blasHandle, const int n, const double alpha, const double x[], double y[]){
cuda_axpy(blasHandle, n, alpha, x, 1, y, 1, cublasDaxpy);
DLLEXPORT CudaResults d_axpy(const cublasHandle_t blasHandle, const int n, const double alpha, const double x[], double y[]){
return cuda_axpy(blasHandle, n, alpha, x, 1, y, 1, cublasDaxpy);
}
DLLEXPORT void c_axpy(const cublasHandle_t blasHandle, const int n, const cuComplex alpha, const cuComplex x[], cuComplex y[]){
cuda_axpy(blasHandle, n, alpha, x, 1, y, 1, cublasCaxpy);
DLLEXPORT CudaResults c_axpy(const cublasHandle_t blasHandle, const int n, const cuComplex alpha, const cuComplex x[], cuComplex y[]){
return cuda_axpy(blasHandle, n, alpha, x, 1, y, 1, cublasCaxpy);
}
DLLEXPORT void z_axpy(const cublasHandle_t blasHandle, const int n, const cuDoubleComplex alpha, const cuDoubleComplex x[], cuDoubleComplex y[]){
cuda_axpy(blasHandle, n, alpha, x, 1, y, 1, cublasZaxpy);
DLLEXPORT CudaResults z_axpy(const cublasHandle_t blasHandle, const int n, const cuDoubleComplex alpha, const cuDoubleComplex x[], cuDoubleComplex y[]){
return cuda_axpy(blasHandle, n, alpha, x, 1, y, 1, cublasZaxpy);
}
DLLEXPORT void s_scale(const cublasHandle_t blasHandle, const int n, const float alpha, float x[]){
cuda_scal(blasHandle, n, alpha, x, 1, cublasSscal);
DLLEXPORT CudaResults s_scale(const cublasHandle_t blasHandle, const int n, const float alpha, float x[]){
return cuda_scal(blasHandle, n, alpha, x, 1, cublasSscal);
}
DLLEXPORT void d_scale(const cublasHandle_t blasHandle, const int n, const double alpha, double x[]){
cuda_scal(blasHandle, n, alpha, x, 1, cublasDscal);
DLLEXPORT CudaResults d_scale(const cublasHandle_t blasHandle, const int n, const double alpha, double x[]){
return cuda_scal(blasHandle, n, alpha, x, 1, cublasDscal);
}
DLLEXPORT void c_scale(const cublasHandle_t blasHandle, const int n, const cuComplex alpha, cuComplex x[]){
cuda_scal(blasHandle, n, alpha, x, 1, cublasCscal);
DLLEXPORT CudaResults c_scale(const cublasHandle_t blasHandle, const int n, const cuComplex alpha, cuComplex x[]){
return cuda_scal(blasHandle, n, alpha, x, 1, cublasCscal);
}
DLLEXPORT void z_scale(const cublasHandle_t blasHandle, const int n, const cuDoubleComplex alpha, cuDoubleComplex x[]){
cuda_scal(blasHandle, n, alpha, x, 1, cublasZscal);
DLLEXPORT CudaResults z_scale(const cublasHandle_t blasHandle, const int n, const cuDoubleComplex alpha, cuDoubleComplex x[]){
return cuda_scal(blasHandle, n, alpha, x, 1, cublasZscal);
}
DLLEXPORT float s_dot_product(const cublasHandle_t blasHandle, const int n, const float x[], const float y[]){
float ret;
cuda_dot(blasHandle, n, x, 1, y, 1, &ret, cublasSdot);
return ret;
DLLEXPORT CudaResults s_dot_product(const cublasHandle_t blasHandle, const int n, const float x[], const float y[], float *result){
return cuda_dot(blasHandle, n, x, 1, y, 1, result, cublasSdot);
}
DLLEXPORT double d_dot_product(const cublasHandle_t blasHandle, const int n, const double x[], const double y[]){
double ret;
cuda_dot(blasHandle, n, x, 1, y, 1, &ret, cublasDdot);
return ret;
DLLEXPORT CudaResults d_dot_product(const cublasHandle_t blasHandle, const int n, const double x[], const double y[], double *result){
return cuda_dot(blasHandle, n, x, 1, y, 1, result, cublasDdot);
}
DLLEXPORT cuComplex c_dot_product(const cublasHandle_t blasHandle, const int n, const cuComplex x[], const cuComplex y[]){
cuComplex ret;
cuda_dot(blasHandle, n, x, 1, y, 1, &ret, cublasCdotu);
return ret;
DLLEXPORT CudaResults c_dot_product(const cublasHandle_t blasHandle, const int n, const cuComplex x[], const cuComplex y[], cuComplex *result){
return cuda_dot(blasHandle, n, x, 1, y, 1, result, cublasCdotu);
}
DLLEXPORT cuDoubleComplex z_dot_product(const cublasHandle_t blasHandle, const int n, const cuDoubleComplex x[], const cuDoubleComplex y[]){
cuDoubleComplex ret;
cuda_dot(blasHandle, n, x, 1, y, 1, &ret, cublasZdotu);
return ret;
DLLEXPORT CudaResults z_dot_product(const cublasHandle_t blasHandle, const int n, const cuDoubleComplex x[], const cuDoubleComplex y[], cuDoubleComplex *result){
return cuda_dot(blasHandle, n, x, 1, y, 1, result, cublasZdotu);
}
DLLEXPORT void s_matrix_multiply(const cublasHandle_t blasHandle, cublasOperation_t transA, cublasOperation_t transB, const int m, const int n, const int k, const float alpha, const float x[], const float y[], const float beta, float c[]){
DLLEXPORT CudaResults s_matrix_multiply(const cublasHandle_t blasHandle, cublasOperation_t transA, cublasOperation_t transB, const int m, const int n, const int k, const float alpha, const float x[], const float y[], const float beta, float c[]){
int lda = transA == CUBLAS_OP_N ? m : k;
int ldb = transB == CUBLAS_OP_N ? k : n;
cuda_gemm(blasHandle, transA, transB, m, n, k, alpha, x, lda, y, ldb, beta, c, m, cublasSgemm);
return cuda_gemm(blasHandle, transA, transB, m, n, k, alpha, x, lda, y, ldb, beta, c, m, cublasSgemm);
}
DLLEXPORT void d_matrix_multiply(const cublasHandle_t blasHandle, cublasOperation_t transA, cublasOperation_t transB, const int m, const int n, const int k, const double alpha, const double x[], const double y[], const double beta, double c[]){
DLLEXPORT CudaResults d_matrix_multiply(const cublasHandle_t blasHandle, cublasOperation_t transA, cublasOperation_t transB, const int m, const int n, const int k, const double alpha, const double x[], const double y[], const double beta, double c[]){
int lda = transA == CUBLAS_OP_N ? m : k;
int ldb = transB == CUBLAS_OP_N ? k : n;
cuda_gemm(blasHandle, transA, transB, m, n, k, alpha, x, lda, y, ldb, beta, c, m, cublasDgemm);
return cuda_gemm(blasHandle, transA, transB, m, n, k, alpha, x, lda, y, ldb, beta, c, m, cublasDgemm);
}
DLLEXPORT void c_matrix_multiply(const cublasHandle_t blasHandle, cublasOperation_t transA, cublasOperation_t transB, const int m, const int n, const int k, const cuComplex alpha, const cuComplex x[], const cuComplex y[], const cuComplex beta, cuComplex c[]){
DLLEXPORT CudaResults c_matrix_multiply(const cublasHandle_t blasHandle, cublasOperation_t transA, cublasOperation_t transB, const int m, const int n, const int k, const cuComplex alpha, const cuComplex x[], const cuComplex y[], const cuComplex beta, cuComplex c[]){
int lda = transA == CUBLAS_OP_N ? m : k;
int ldb = transB == CUBLAS_OP_N ? k : n;
cuda_gemm(blasHandle, transA, transB, m, n, k, alpha, x, lda, y, ldb, beta, c, m, cublasCgemm);
return cuda_gemm(blasHandle, transA, transB, m, n, k, alpha, x, lda, y, ldb, beta, c, m, cublasCgemm);
}
DLLEXPORT void z_matrix_multiply(const cublasHandle_t blasHandle, cublasOperation_t transA, cublasOperation_t transB, const int m, const int n, const int k, const cuDoubleComplex alpha, const cuDoubleComplex x[], const cuDoubleComplex y[], const cuDoubleComplex beta, cuDoubleComplex c[]){
DLLEXPORT CudaResults z_matrix_multiply(const cublasHandle_t blasHandle, cublasOperation_t transA, cublasOperation_t transB, const int m, const int n, const int k, const cuDoubleComplex alpha, const cuDoubleComplex x[], const cuDoubleComplex y[], const cuDoubleComplex beta, cuDoubleComplex c[]){
int lda = transA == CUBLAS_OP_N ? m : k;
int ldb = transB == CUBLAS_OP_N ? k : n;
cuda_gemm(blasHandle, transA, transB, m, n, k, alpha, x, lda, y, ldb, beta, c, m, cublasZgemm);
return cuda_gemm(blasHandle, transA, transB, m, n, k, alpha, x, lda, y, ldb, beta, c, m, cublasZgemm);
}
}

63
src/NativeProviders/CUDA/capabilities.cpp

@ -1,4 +1,7 @@
#include "wrapper_common.h"
#include <stdio.h>
#include "wrapper_cuda.h"
#include "cuda_runtime.h"
#include "cublas_v2.h"
#include "cusolverDn.h"
@ -14,6 +17,10 @@ extern "C" {
*/
DLLEXPORT int query_capability(const int capability)
{
int count;
int device;
cudaDeviceProp prop;
switch (capability)
{
@ -42,10 +49,36 @@ extern "C" {
#endif
// COMMON/SHARED
case 64: return 1; // revision
case 64:
if (cudaGetDeviceCount(&count))
return 0;
if (count == 0)
return 0;
if (cudaGetDevice(&device))
return 0;
if (cudaGetDeviceProperties(&prop, device))
return 0;
return prop.major;
// LINEAR ALGEBRA
case 128: return 1; // basic dense linear algebra
case 128:
if (cudaGetDeviceCount(&count))
return 0;
if (count == 0)
return 0;
if (cudaGetDevice(&device))
return 0;
if (cudaGetDeviceProperties(&prop, device))
return 0;
return prop.major >= 2;
// OPTIMIZATION
case 256: return 0; // basic optimization
@ -58,20 +91,28 @@ extern "C" {
}
}
DLLEXPORT cublasStatus_t createBLASHandle(cublasHandle_t *blasHandle){
return cublasCreate(blasHandle);
DLLEXPORT CudaResults createBLASHandle(cublasHandle_t *blasHandle){
CudaResults ret = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
ret.blasStatus = cublasCreate(blasHandle);
return ret;
}
DLLEXPORT cublasStatus_t destroyBLASHandle(cublasHandle_t blasHandle){
return cublasDestroy(blasHandle);
DLLEXPORT CudaResults destroyBLASHandle(cublasHandle_t blasHandle){
CudaResults ret = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
ret.blasStatus = cublasDestroy(blasHandle);
return ret;
}
DLLEXPORT cusolverStatus_t createSolverHandle(cusolverDnHandle_t *solverHandle){
return cusolverDnCreate(solverHandle);
DLLEXPORT CudaResults createSolverHandle(cusolverDnHandle_t *solverHandle){
CudaResults ret = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
ret.solverStatus = cusolverDnCreate(solverHandle);
return ret;
}
DLLEXPORT cusolverStatus_t destroySolverHandle(cusolverDnHandle_t solverHandle){
return cusolverDnDestroy(solverHandle);
DLLEXPORT CudaResults destroySolverHandle(cusolverDnHandle_t solverHandle){
CudaResults ret = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
ret.solverStatus = cusolverDnDestroy(solverHandle);
return ret;
}
#ifdef __cplusplus

560
src/NativeProviders/CUDA/lapack.cpp

@ -1,97 +1,102 @@
#include <algorithm>
#include "lapack_common.h"
#include "wrapper_common.h"
#include "wrapper_cuda.h"
#include "cublas_v2.h"
#include "cusolverDn.h"
#include "cuda_runtime.h"
template<typename T, typename GETRF, typename GETRFBSIZE>
inline int lu_factor(cusolverDnHandle_t solverHandle, int m, T a[], int ipiv[], GETRF getrf, GETRFBSIZE getrfbsize)
inline CudaResults lu_factor(cusolverDnHandle_t solverHandle, int m, T a[], int ipiv[], GETRF getrf, GETRFBSIZE getrfbsize, int *info)
{
int info = 0;
CudaResults results = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
T* d_A = NULL;
cudaMalloc((void**)&d_A, m*m*sizeof(T));
cublasSetMatrix(m, m, sizeof(T), a, m, d_A, m);
int* d_I = NULL;
cudaMalloc((void**)&d_I, m*sizeof(int));
T* work = NULL;
int* d_info = NULL;
SAFECUDACALL(results.error, cudaMalloc((void**)&d_A, m*m*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(m, m, sizeof(T), a, m, d_A, m));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_I, m*sizeof(int)));
int lwork = 0;
getrfbsize(solverHandle, m, m, a, m, &lwork);
cudaMalloc((void**)&work, sizeof(T)*lwork);
SAFECUDACALL(results.solverStatus, getrfbsize(solverHandle, m, m, a, m, &lwork));
SAFECUDACALL(results.error, cudaMalloc((void**)&work, sizeof(T)*lwork));
int* d_info = NULL;
cudaMalloc((void**)&d_info, sizeof(int));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_info, sizeof(int)));
getrf(solverHandle, m, m, d_A, m, work, d_I, d_info);
SAFECUDACALL(results.solverStatus, getrf(solverHandle, m, m, d_A, m, work, d_I, d_info));
cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost);
SAFECUDACALL(results.error, cudaMemcpy(info, d_info, sizeof(int), cudaMemcpyDeviceToHost));
cublasGetMatrix(m, m, sizeof(T), d_A, m, a, m);
cublasGetVector(m, sizeof(int), d_I, 1, ipiv, 1);
SAFECUDACALL(results.blasStatus, cublasGetMatrix(m, m, sizeof(T), d_A, m, a, m));
SAFECUDACALL(results.blasStatus, cublasGetVector(m, sizeof(int), d_I, 1, ipiv, 1));
shift_ipiv_down(m, ipiv);
exit:
cudaFree(d_A);
cudaFree(d_I);
cudaFree(d_info);
cudaFree(work);
return info;
return results;
};
template<typename T, typename GETRF, typename GETRIBATCHED, typename GETRFBSIZE>
inline int lu_inverse(cusolverDnHandle_t solverHandle, cublasHandle_t blasHandle, int n, T a[], GETRF getrf, GETRIBATCHED getribatched, GETRFBSIZE getrfbsize)
inline CudaResults lu_inverse(cusolverDnHandle_t solverHandle, cublasHandle_t blasHandle, int n, T a[], GETRF getrf, GETRIBATCHED getribatched, GETRFBSIZE getrfbsize, int *info)
{
int info = 0;
CudaResults results = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
int* d_I = NULL;
cudaMalloc((void**)&d_I, n*sizeof(int));
T* d_A = NULL;
cudaMalloc((void**)&d_A, n*n*sizeof(T));
cublasSetMatrix(n, n, sizeof(T), a, n, d_A, n);
T* work = NULL;
int* d_info = NULL;
T* d_C = NULL;
const T **d_Aarray = NULL;
T **d_Carray = NULL;
SAFECUDACALL(results.error, cudaMalloc((void**)&d_I, n*sizeof(int)));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_A, n*n*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(n, n, sizeof(T), a, n, d_A, n));
int lwork = 0;
getrfbsize(solverHandle, n, n, d_A, n, &lwork);
cudaMalloc((void**)&work, sizeof(T)*lwork);
SAFECUDACALL(results.solverStatus, getrfbsize(solverHandle, n, n, d_A, n, &lwork));
SAFECUDACALL(results.error, cudaMalloc((void**)&work, sizeof(T)*lwork));
int* d_info = NULL;
cudaMalloc((void**)&d_info, sizeof(int));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_info, sizeof(int)));
getrf(solverHandle, n, n, d_A, n, work, d_I, d_info);
cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost);
SAFECUDACALL(results.solverStatus, getrf(solverHandle, n, n, d_A, n, work, d_I, d_info));
SAFECUDACALL(results.error, cudaMemcpy(info, d_info, sizeof(int), cudaMemcpyDeviceToHost));
cudaFree(work);
if (info != 0)
if (*info != 0)
{
cudaFree(d_A);
cudaFree(d_I);
cudaFree(d_info);
return info;
return results;
}
T* d_C = NULL;
cudaMalloc((void**)&d_C, n*n*sizeof(T));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_C, n*n*sizeof(T)));
const T **d_Aarray = NULL;
cudaMalloc((void**)&d_Aarray, sizeof(T*));
cudaMemcpy(d_Aarray, &d_A, sizeof(T*), cudaMemcpyHostToDevice);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_Aarray, sizeof(T*)));
SAFECUDACALL(results.error, cudaMemcpy(d_Aarray, &d_A, sizeof(T*), cudaMemcpyHostToDevice));
T **d_Carray = NULL;
cudaMalloc((void**)&d_Carray, sizeof(T*));
cudaMemcpy(d_Carray, &d_C, sizeof(T*), cudaMemcpyHostToDevice);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_Carray, sizeof(T*)));
SAFECUDACALL(results.error, cudaMemcpy(d_Carray, &d_C, sizeof(T*), cudaMemcpyHostToDevice));
getribatched(blasHandle, n, d_Aarray, n, d_I, d_Carray, n, d_info, 1);
cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost);
SAFECUDACALL(results.blasStatus, getribatched(blasHandle, n, d_Aarray, n, d_I, d_Carray, n, d_info, 1));
SAFECUDACALL(results.error, cudaMemcpy(info, d_info, sizeof(int), cudaMemcpyDeviceToHost));
cublasGetMatrix(n, n, sizeof(T), d_C, n, a, n);
SAFECUDACALL(results.blasStatus, cublasGetMatrix(n, n, sizeof(T), d_C, n, a, n));
exit:
cudaFree(work);
cudaFree(d_A);
cudaFree(d_I);
cudaFree(d_C);
@ -99,46 +104,47 @@ inline int lu_inverse(cusolverDnHandle_t solverHandle, cublasHandle_t blasHandle
cudaFree(d_Aarray);
cudaFree(d_Carray);
return info;
return results;
};
template<typename T, typename GETRI>
inline int lu_inverse_factored(cublasHandle_t blasHandle, int n, T a[], int ipiv[], GETRI getri)
inline CudaResults lu_inverse_factored(cublasHandle_t blasHandle, int n, T a[], int ipiv[], GETRI getri, int *info)
{
int info = 0;
CudaResults results = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
T* d_A = NULL;
T* d_C = NULL;
int* d_I = NULL;
int* d_info = NULL;
const T **d_Aarray = NULL;
T **d_Carray = NULL;
shift_ipiv_up(n, ipiv);
T* d_A = NULL;
cudaMalloc((void**)&d_A, n*n*sizeof(T));
cublasSetMatrix(n, n, sizeof(T), a, n, d_A, n);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_A, n*n*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(n, n, sizeof(T), a, n, d_A, n));
T* d_C = NULL;
cudaMalloc((void**)&d_C, n*n*sizeof(T));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_C, n*n*sizeof(T)));
int* d_I = NULL;
cudaMalloc((void**)&d_I, n*sizeof(int));
cublasSetVector(n, sizeof(int), ipiv, 1, d_I, 1);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_I, n*sizeof(int)));
SAFECUDACALL(results.blasStatus, cublasSetVector(n, sizeof(int), ipiv, 1, d_I, 1));
int* d_info = NULL;
cudaMalloc((void**)&d_info, sizeof(int));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_info, sizeof(int)));
const T **d_Aarray = NULL;
cudaMalloc((void**)&d_Aarray, sizeof(T*));
cudaMemcpy(d_Aarray, &d_A, sizeof(T*), cudaMemcpyHostToDevice);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_Aarray, sizeof(T*)));
SAFECUDACALL(results.error, cudaMemcpy(d_Aarray, &d_A, sizeof(T*), cudaMemcpyHostToDevice));
T **d_Carray = NULL;
cudaMalloc((void**)&d_Carray, sizeof(T*));
cudaMemcpy(d_Carray, &d_C, sizeof(T*), cudaMemcpyHostToDevice);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_Carray, sizeof(T*)));
SAFECUDACALL(results.error, cudaMemcpy(d_Carray, &d_C, sizeof(T*), cudaMemcpyHostToDevice));
getri(blasHandle, n, d_Aarray, n, d_I, d_Carray, n, d_info, 1);
cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost);
SAFECUDACALL(results.blasStatus, getri(blasHandle, n, d_Aarray, n, d_I, d_Carray, n, d_info, 1));
SAFECUDACALL(results.error, cudaMemcpy(info, d_info, sizeof(int), cudaMemcpyDeviceToHost));
cublasGetMatrix(n, n, sizeof(T), d_C, n, a, n);
cublasGetVector(n, sizeof(int), d_I, 1, ipiv, 1);
SAFECUDACALL(results.blasStatus, cublasGetMatrix(n, n, sizeof(T), d_C, n, a, n));
SAFECUDACALL(results.blasStatus, cublasGetVector(n, sizeof(int), d_I, 1, ipiv, 1));
shift_ipiv_down(n, ipiv);
exit:
cudaFree(d_A);
cudaFree(d_I);
cudaFree(d_C);
@ -146,120 +152,123 @@ inline int lu_inverse_factored(cublasHandle_t blasHandle, int n, T a[], int ipiv
cudaFree(d_Aarray);
cudaFree(d_Carray);
return info;
return results;
}
template<typename T, typename GETRS>
inline int lu_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, T a[], int ipiv[], T b[], GETRS getrs)
inline CudaResults lu_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, T a[], int ipiv[], T b[], GETRS getrs, int *info)
{
int info = 0;
CudaResults results = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
shift_ipiv_up(n, ipiv);
T* d_A = NULL;
cudaMalloc((void**)&d_A, n*n*sizeof(T));
cublasSetMatrix(n, n, sizeof(T), a, n, d_A, n);
T* d_B = NULL;
cudaMalloc((void**)&d_B, n*nrhs*sizeof(T));
cublasSetMatrix(n, nrhs, sizeof(T), b, n, d_B, n);
int* d_I = NULL;
cudaMalloc((void**)&d_I, n*sizeof(int));
cublasSetVector(n, sizeof(int), ipiv, 1, d_I, 1);
int* d_info = NULL;
cudaMalloc((void**)&d_info, sizeof(int));
getrs(solverHandle, CUBLAS_OP_N, n, nrhs, d_A, n, d_I, d_B, n, d_info);
cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_A, n*n*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(n, n, sizeof(T), a, n, d_A, n));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_B, n*nrhs*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(n, nrhs, sizeof(T), b, n, d_B, n));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_I, n*sizeof(int)));
SAFECUDACALL(results.blasStatus, cublasSetVector(n, sizeof(int), ipiv, 1, d_I, 1));
cublasGetMatrix(n, nrhs, sizeof(T), d_B, n, b, n);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_info, sizeof(int)));
SAFECUDACALL(results.solverStatus, getrs(solverHandle, CUBLAS_OP_N, n, nrhs, d_A, n, d_I, d_B, n, d_info));
SAFECUDACALL(results.error, cudaMemcpy(info, d_info, sizeof(int), cudaMemcpyDeviceToHost));
SAFECUDACALL(results.blasStatus, cublasGetMatrix(n, nrhs, sizeof(T), d_B, n, b, n));
shift_ipiv_down(n, ipiv);
exit:
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_I);
cudaFree(d_info);
return info;
return results;
}
template<typename T, typename GETRF, typename GETRS, typename GETRFBSIZE>
inline int lu_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, T a[], T b[], GETRF getrf, GETRS getrs, GETRFBSIZE getrfbsize)
inline CudaResults lu_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, T a[], T b[], GETRF getrf, GETRS getrs, GETRFBSIZE getrfbsize, int *info)
{
int info = 0;
CudaResults results = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
int* d_I = NULL;
cudaMalloc((void**)&d_I, n*sizeof(int));
T* d_A = NULL;
cudaMalloc((void**)&d_A, n*n*sizeof(T));
cublasSetMatrix(n, n, sizeof(T), a, n, d_A, n);
T* work = NULL;
int* d_info = NULL;
T* d_B = NULL;
SAFECUDACALL(results.error, cudaMalloc((void**)&d_I, n*sizeof(int)));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_A, n*n*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(n, n, sizeof(T), a, n, d_A, n));
int lwork = 0;
getrfbsize(solverHandle, n, n, a, n, &lwork);
cudaMalloc((void**)&work, sizeof(T)*lwork);
SAFECUDACALL(results.solverStatus, getrfbsize(solverHandle, n, n, a, n, &lwork));
SAFECUDACALL(results.error, cudaMalloc((void**)&work, sizeof(T)*lwork));
int* d_info = NULL;
cudaMalloc((void**)&d_info, sizeof(int));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_info, sizeof(int)));
getrf(solverHandle, n, n, d_A, n, work, d_I, d_info);
cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost);
SAFECUDACALL(results.solverStatus, getrf(solverHandle, n, n, d_A, n, work, d_I, d_info));
SAFECUDACALL(results.error, cudaMemcpy(info, d_info, sizeof(int), cudaMemcpyDeviceToHost));
cudaFree(work);
if (info != 0)
if (*info != 0)
{
cudaFree(d_I);
cudaFree(d_A);
cudaFree(d_info);
return info;
return results;
}
T* d_B = NULL;
cudaMalloc((void**)&d_B, n*nrhs*sizeof(T));
cublasSetMatrix(n, nrhs, sizeof(T), b, n, d_B, n);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_B, n*nrhs*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(n, nrhs, sizeof(T), b, n, d_B, n));
getrs(solverHandle, CUBLAS_OP_N, n, nrhs, d_A, n, d_I, d_B, n, d_info);
cudaMemcpy(&info, d_info, 1, cudaMemcpyDeviceToHost);
SAFECUDACALL(results.solverStatus, getrs(solverHandle, CUBLAS_OP_N, n, nrhs, d_A, n, d_I, d_B, n, d_info));
SAFECUDACALL(results.error, cudaMemcpy(info, d_info, 1, cudaMemcpyDeviceToHost));
cublasGetMatrix(n, nrhs, sizeof(T), d_B, n, b, n);
SAFECUDACALL(results.blasStatus, cublasGetMatrix(n, nrhs, sizeof(T), d_B, n, b, n));
exit:
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_I);
cudaFree(d_info);
return info;
return results;
}
template<typename T, typename POTRF, typename POTRFBSIZE>
inline int cholesky_factor(cusolverDnHandle_t solverHandle, int n, T a[], POTRF potrf, POTRFBSIZE potrfbsize)
inline CudaResults cholesky_factor(cusolverDnHandle_t solverHandle, int n, T a[], POTRF potrf, POTRFBSIZE potrfbsize, int *info)
{
int info = 0;
CudaResults results = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
T* d_A = NULL;
cudaMalloc((void**)&d_A, n*n*sizeof(T));
cublasSetMatrix(n, n, sizeof(T), a, n, d_A, n);
T zero = T();
T* work = NULL;
int* d_info = NULL;
int lWork = 0;
potrfbsize(solverHandle, CUBLAS_FILL_MODE_LOWER, n, d_A, n, &lWork);
cudaMalloc((void**)&work, sizeof(T)*lWork);
int* d_info = NULL;
cudaMalloc((void**)&d_info, sizeof(int));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_A, n*n*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(n, n, sizeof(T), a, n, d_A, n));
potrf(solverHandle, CUBLAS_FILL_MODE_LOWER, n, d_A, n, work, lWork, d_info);
cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost);
SAFECUDACALL(results.solverStatus, potrfbsize(solverHandle, CUBLAS_FILL_MODE_LOWER, n, d_A, n, &lWork));
SAFECUDACALL(results.error, cudaMalloc((void**)&work, sizeof(T)*lWork));
cublasGetMatrix(n, n, sizeof(T), d_A, n, a, n);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_info, sizeof(int)));
T zero = T();
SAFECUDACALL(results.solverStatus, potrf(solverHandle, CUBLAS_FILL_MODE_LOWER, n, d_A, n, work, lWork, d_info));
SAFECUDACALL(results.error, cudaMemcpy(info, d_info, sizeof(int), cudaMemcpyDeviceToHost));
SAFECUDACALL(results.blasStatus, cublasGetMatrix(n, n, sizeof(T), d_A, n, a, n));
for (int i = 0; i < n; ++i)
{
@ -271,84 +280,89 @@ inline int cholesky_factor(cusolverDnHandle_t solverHandle, int n, T a[], POTRF
}
}
exit:
cudaFree(d_A);
cudaFree(d_info);
cudaFree(work);
return info;
return results;
}
template<typename T, typename POTRF, typename POTRS, typename POTRFBSIZE>
inline int cholesky_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, T a[], T b[], POTRF potrf, POTRS potrs, POTRFBSIZE potrfbsize)
inline CudaResults cholesky_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, T a[], T b[], POTRF potrf, POTRS potrs, POTRFBSIZE potrfbsize, int *info)
{
int info = 0;
CudaResults results = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
T* d_A = NULL;
cudaMalloc((void**)&d_A, n*n*sizeof(T));
cublasSetMatrix(n, n, sizeof(T), a, n, d_A, n);
T* work = NULL;
int* d_info = NULL;
T* d_B = NULL;
SAFECUDACALL(results.error, cudaMalloc((void**)&d_A, n*n*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(n, n, sizeof(T), a, n, d_A, n));
int lWork = 0;
potrfbsize(solverHandle, CUBLAS_FILL_MODE_LOWER, n, d_A, n, &lWork);
cudaMalloc((void**)&work, sizeof(T)*lWork);
SAFECUDACALL(results.solverStatus, potrfbsize(solverHandle, CUBLAS_FILL_MODE_LOWER, n, d_A, n, &lWork));
SAFECUDACALL(results.error, cudaMalloc((void**)&work, sizeof(T)*lWork));
int* d_info = NULL;
cudaMalloc((void**)&d_info, sizeof(int));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_info, sizeof(int)));
potrf(solverHandle, CUBLAS_FILL_MODE_LOWER, n, d_A, n, work, lWork, d_info);
cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost);
SAFECUDACALL(results.solverStatus, potrf(solverHandle, CUBLAS_FILL_MODE_LOWER, n, d_A, n, work, lWork, d_info));
SAFECUDACALL(results.error, cudaMemcpy(info, d_info, sizeof(int), cudaMemcpyDeviceToHost));
cudaFree(work);
if (info != 0)
if (*info != 0)
{
cudaFree(d_A);
cudaFree(d_info);
return info;
return results;
}
T* d_B = NULL;
cudaMalloc((void**)&d_B, n*nrhs*sizeof(T));
cublasSetMatrix(n, nrhs, sizeof(T), b, n, d_B, n);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_B, n*nrhs*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(n, nrhs, sizeof(T), b, n, d_B, n));
potrs(solverHandle, CUBLAS_FILL_MODE_LOWER, n, nrhs, d_A, n, d_B, n, d_info);
cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost);
SAFECUDACALL(results.solverStatus, potrs(solverHandle, CUBLAS_FILL_MODE_LOWER, n, nrhs, d_A, n, d_B, n, d_info));
SAFECUDACALL(results.error, cudaMemcpy(info, d_info, sizeof(int), cudaMemcpyDeviceToHost));
cublasGetMatrix(n, nrhs, sizeof(T), d_B, n, b, n);
SAFECUDACALL(results.blasStatus, cublasGetMatrix(n, nrhs, sizeof(T), d_B, n, b, n));
exit:
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_info);
return info;
return results;
}
template<typename T, typename POTRS>
inline int cholesky_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, T a[], T b[], POTRS potrs)
inline CudaResults cholesky_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, T a[], T b[], POTRS potrs, int *info)
{
int info = 0;
CudaResults results = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
T* d_A = NULL;
cudaMalloc((void**)&d_A, n*n*sizeof(T));
cublasSetMatrix(n, n, sizeof(T), a, n, d_A, n);
T* d_B = NULL;
cudaMalloc((void**)&d_B, n*nrhs*sizeof(T));
cublasSetMatrix(n, nrhs, sizeof(T), b, n, d_B, n);
int* d_info = NULL;
cudaMalloc((void**)&d_info, sizeof(int));
potrs(solverHandle, CUBLAS_FILL_MODE_LOWER, n, nrhs, d_A, n, d_B, n, d_info);
cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_A, n*n*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(n, n, sizeof(T), a, n, d_A, n));
cublasGetMatrix(n, nrhs, sizeof(T), d_B, n, b, n);
SAFECUDACALL(results.error, cudaMalloc((void**)&d_B, n*nrhs*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(n, nrhs, sizeof(T), b, n, d_B, n));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_info, sizeof(int)));
SAFECUDACALL(results.solverStatus, potrs(solverHandle, CUBLAS_FILL_MODE_LOWER, n, nrhs, d_A, n, d_B, n, d_info));
SAFECUDACALL(results.error, cudaMemcpy(info, d_info, sizeof(int), cudaMemcpyDeviceToHost));
SAFECUDACALL(results.blasStatus, cublasGetMatrix(n, nrhs, sizeof(T), d_B, n, b, n));
exit:
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_info);
return info;
return results;
}
//template<typename T, typename GEQRF, typename ORGQR>
@ -447,43 +461,45 @@ inline int cholesky_solve_factored(cusolverDnHandle_t solverHandle, int n, int n
//}
template<typename T, typename GESVD, typename GESVDBSIZE>
inline int svd_factor(cusolverDnHandle_t solverHandle, bool compute_vectors, int m, int n, T a[], T s[], T u[], T v[], GESVD gesvd, GESVDBSIZE gesvdbsize)
inline CudaResults svd_factor(cusolverDnHandle_t solverHandle, bool compute_vectors, int m, int n, T a[], T s[], T u[], T v[], GESVD gesvd, GESVDBSIZE gesvdbsize, int *info)
{
int info = 0;
CudaResults results = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
int dim_s = std::min(m, n);
T* d_A = NULL;
cudaMalloc((void**)&d_A, m*n*sizeof(T));
cublasSetMatrix(m, n, sizeof(T), a, m, d_A, m);
T* d_S = NULL;
cudaMalloc((void**)&d_S, dim_s*sizeof(T));
T* d_U = NULL;
cudaMalloc((void**)&d_U, m*m*sizeof(T));
T* d_V = NULL;
cudaMalloc((void**)&d_V, n*n*sizeof(T));
T* work = NULL;
T* rwork = NULL;
int* d_info = NULL;
SAFECUDACALL(results.error, cudaMalloc((void**)&d_A, m*n*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(m, n, sizeof(T), a, m, d_A, m));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_S, dim_s*sizeof(T)));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_U, m*m*sizeof(T)));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_V, n*n*sizeof(T)));
int lWork = 0;
gesvdbsize(solverHandle, m, n, &lWork);
cudaMalloc((void**)&work, lWork*sizeof(T));
SAFECUDACALL(results.solverStatus, gesvdbsize(solverHandle, m, n, &lWork));
SAFECUDACALL(results.error, cudaMalloc((void**)&work, lWork*sizeof(T)));
T* rwork = NULL;
cudaMalloc((void**)&rwork, 5 * dim_s * sizeof(T));
SAFECUDACALL(results.error, cudaMalloc((void**)&rwork, 5 * dim_s * sizeof(T)));
int* d_info = NULL;
cudaMalloc((void**)&d_info, sizeof(int));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_info, sizeof(int)));
char job = compute_vectors ? 'A' : 'N';
gesvd(solverHandle, job, job, m, n, d_A, m, d_S, d_U, m, d_V, n, work, lWork, rwork, d_info);
cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost);
SAFECUDACALL(results.solverStatus, gesvd(solverHandle, job, job, m, n, d_A, m, d_S, d_U, m, d_V, n, work, lWork, rwork, d_info));
SAFECUDACALL(results.error, cudaMemcpy(info, d_info, sizeof(int), cudaMemcpyDeviceToHost));
cublasGetVector(dim_s, sizeof(T), d_S, 1, s, 1);
cublasGetMatrix(m, m, sizeof(T), d_U, m, u, m);
cublasGetMatrix(n, n, sizeof(T), d_V, n, v, n);
SAFECUDACALL(results.blasStatus, cublasGetVector(dim_s, sizeof(T), d_S, 1, s, 1));
SAFECUDACALL(results.blasStatus, cublasGetMatrix(m, m, sizeof(T), d_U, m, u, m));
SAFECUDACALL(results.blasStatus, cublasGetMatrix(n, n, sizeof(T), d_V, n, v, n));
exit:
cudaFree(d_A);
cudaFree(d_S);
cudaFree(d_U);
@ -492,53 +508,55 @@ inline int svd_factor(cusolverDnHandle_t solverHandle, bool compute_vectors, int
cudaFree(rwork);
cudaFree(d_info);
return info;
return results;
}
template<typename T, typename R, typename GESVD, typename GESVDBSIZE>
inline int complex_svd_factor(cusolverDnHandle_t solverHandle, bool compute_vectors, int m, int n, T a[], T s[], T u[], T v[], GESVD gesvd, GESVDBSIZE gesvdbsize)
inline CudaResults complex_svd_factor(cusolverDnHandle_t solverHandle, bool compute_vectors, int m, int n, T a[], T s[], T u[], T v[], GESVD gesvd, GESVDBSIZE gesvdbsize, int *info)
{
int info = 0;
CudaResults results = { cudaError_t::cudaSuccess, cublasStatus_t::CUBLAS_STATUS_SUCCESS, cusolverStatus_t::CUSOLVER_STATUS_SUCCESS };
int dim_s = std::min(m, n);
T* d_A = NULL;
cudaMalloc((void**)&d_A, m*n*sizeof(T));
cublasSetMatrix(m, n, sizeof(T), a, m, d_A, m);
R* s_local = new R[dim_s];
R* d_S = NULL;
cudaMalloc((void**)&d_S, dim_s*sizeof(R));
T* d_U = NULL;
cudaMalloc((void**)&d_U, m*m*sizeof(T));
T* d_V = NULL;
cudaMalloc((void**)&d_V, n*m*sizeof(T));
T* work = NULL;
R* rwork = NULL;
int* d_info = NULL;
SAFECUDACALL(results.error, cudaMalloc((void**)&d_A, m*n*sizeof(T)));
SAFECUDACALL(results.blasStatus, cublasSetMatrix(m, n, sizeof(T), a, m, d_A, m));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_S, dim_s*sizeof(R)));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_U, m*m*sizeof(T)));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_V, n*m*sizeof(T)));
int lWork = 0;
gesvdbsize(solverHandle, m, n, &lWork);
cudaMalloc((void**)&work, lWork*sizeof(T));
SAFECUDACALL(results.solverStatus, gesvdbsize(solverHandle, m, n, &lWork));
SAFECUDACALL(results.error, cudaMalloc((void**)&work, lWork*sizeof(T)));
R* rwork = NULL;
cudaMalloc((void**)&rwork, 5 * dim_s * sizeof(R));
SAFECUDACALL(results.error, cudaMalloc((void**)&rwork, 5 * dim_s * sizeof(R)));
int* d_info = NULL;
cudaMalloc((void**)&d_info, sizeof(int));
SAFECUDACALL(results.error, cudaMalloc((void**)&d_info, sizeof(int)));
char job = compute_vectors ? 'A' : 'N';
gesvd(solverHandle, job, job, m, n, d_A, m, d_S, d_U, m, d_V, n, work, lWork, rwork, d_info);
cudaMemcpy(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost);
SAFECUDACALL(results.solverStatus, gesvd(solverHandle, job, job, m, n, d_A, m, d_S, d_U, m, d_V, n, work, lWork, rwork, d_info));
SAFECUDACALL(results.error, cudaMemcpy(info, d_info, sizeof(int), cudaMemcpyDeviceToHost));
cublasGetVector(dim_s, sizeof(R), d_S, 1, s_local, 1);
cublasGetMatrix(m, m, sizeof(T), d_U, m, u, m);
cublasGetMatrix(n, n, sizeof(T), d_V, n, v, n);
SAFECUDACALL(results.blasStatus, cublasGetVector(dim_s, sizeof(R), d_S, 1, s_local, 1));
SAFECUDACALL(results.blasStatus, cublasGetMatrix(m, m, sizeof(T), d_U, m, u, m));
SAFECUDACALL(results.blasStatus, cublasGetMatrix(n, n, sizeof(T), d_V, n, v, n));
for (int index = 0; index < dim_s; ++index)
{
s[index].x = s_local[index];
}
exit:
delete[] s_local;
cudaFree(d_A);
cudaFree(d_S);
@ -548,7 +566,7 @@ inline int complex_svd_factor(cusolverDnHandle_t solverHandle, bool compute_vect
cudaFree(rwork);
cudaFree(d_info);
return info;
return results;
}
//template<typename T, typename R, typename GEES, typename TREVC>
@ -730,164 +748,164 @@ inline int complex_svd_factor(cusolverDnHandle_t solverHandle, bool compute_vect
extern "C" {
DLLEXPORT int s_lu_factor(cusolverDnHandle_t solverHandle, int m, float a[], int ipiv[])
DLLEXPORT CudaResults s_lu_factor(cusolverDnHandle_t solverHandle, int m, float a[], int ipiv[], int *info)
{
return lu_factor(solverHandle, m, a, ipiv, sgetrf, sgetrfbsize);
return lu_factor(solverHandle, m, a, ipiv, sgetrf, sgetrfbsize, info);
}
DLLEXPORT int d_lu_factor(cusolverDnHandle_t solverHandle, int m, double a[], int ipiv[])
DLLEXPORT CudaResults d_lu_factor(cusolverDnHandle_t solverHandle, int m, double a[], int ipiv[], int *info)
{
return lu_factor(solverHandle, m, a, ipiv, dgetrf, dgetrfbsize);
return lu_factor(solverHandle, m, a, ipiv, dgetrf, dgetrfbsize, info);
}
DLLEXPORT int c_lu_factor(cusolverDnHandle_t solverHandle, int m, cuComplex a[], int ipiv[])
DLLEXPORT CudaResults c_lu_factor(cusolverDnHandle_t solverHandle, int m, cuComplex a[], int ipiv[], int *info)
{
return lu_factor(solverHandle, m, a, ipiv, cgetrf, cgetrfbsize);
return lu_factor(solverHandle, m, a, ipiv, cgetrf, cgetrfbsize, info);
}
DLLEXPORT int z_lu_factor(cusolverDnHandle_t solverHandle, int m, cuDoubleComplex a[], int ipiv[])
DLLEXPORT CudaResults z_lu_factor(cusolverDnHandle_t solverHandle, int m, cuDoubleComplex a[], int ipiv[], int *info)
{
return lu_factor(solverHandle, m, a, ipiv, zgetrf, zgetrfbsize);
return lu_factor(solverHandle, m, a, ipiv, zgetrf, zgetrfbsize, info);
}
DLLEXPORT int s_lu_inverse(cusolverDnHandle_t solverHandle, cublasHandle_t blasHandle, int n, float a[])
DLLEXPORT CudaResults s_lu_inverse(cusolverDnHandle_t solverHandle, cublasHandle_t blasHandle, int n, float a[], int *info)
{
return lu_inverse(solverHandle, blasHandle, n, a, sgetrf, sgetribatched, sgetrfbsize);
return lu_inverse(solverHandle, blasHandle, n, a, sgetrf, sgetribatched, sgetrfbsize, info);
}
DLLEXPORT int d_lu_inverse(cusolverDnHandle_t solverHandle, cublasHandle_t blasHandle, int n, double a[])
DLLEXPORT CudaResults d_lu_inverse(cusolverDnHandle_t solverHandle, cublasHandle_t blasHandle, int n, double a[], int *info)
{
return lu_inverse(solverHandle, blasHandle, n, a, dgetrf, dgetribatched, dgetrfbsize);
return lu_inverse(solverHandle, blasHandle, n, a, dgetrf, dgetribatched, dgetrfbsize, info);
}
DLLEXPORT int c_lu_inverse(cusolverDnHandle_t solverHandle, cublasHandle_t blasHandle, int n, cuComplex a[])
DLLEXPORT CudaResults c_lu_inverse(cusolverDnHandle_t solverHandle, cublasHandle_t blasHandle, int n, cuComplex a[], int *info)
{
return lu_inverse(solverHandle, blasHandle, n, a, cgetrf, cgetribatched, cgetrfbsize);
return lu_inverse(solverHandle, blasHandle, n, a, cgetrf, cgetribatched, cgetrfbsize, info);
}
DLLEXPORT int z_lu_inverse(cusolverDnHandle_t solverHandle, cublasHandle_t blasHandle, int n, cuDoubleComplex a[])
DLLEXPORT CudaResults z_lu_inverse(cusolverDnHandle_t solverHandle, cublasHandle_t blasHandle, int n, cuDoubleComplex a[], int *info)
{
return lu_inverse(solverHandle, blasHandle, n, a, zgetrf, zgetribatched, zgetrfbsize);
return lu_inverse(solverHandle, blasHandle, n, a, zgetrf, zgetribatched, zgetrfbsize, info);
}
DLLEXPORT int s_lu_inverse_factored(cublasHandle_t blasHandle, int n, float a[], int ipiv[])
DLLEXPORT CudaResults s_lu_inverse_factored(cublasHandle_t blasHandle, int n, float a[], int ipiv[], int *info)
{
return lu_inverse_factored(blasHandle, n, a, ipiv, sgetribatched);
return lu_inverse_factored(blasHandle, n, a, ipiv, sgetribatched, info);
}
DLLEXPORT int d_lu_inverse_factored(cublasHandle_t blasHandle, int n, double a[], int ipiv[])
DLLEXPORT CudaResults d_lu_inverse_factored(cublasHandle_t blasHandle, int n, double a[], int ipiv[], int *info)
{
return lu_inverse_factored(blasHandle, n, a, ipiv, dgetribatched);
return lu_inverse_factored(blasHandle, n, a, ipiv, dgetribatched, info);
}
DLLEXPORT int c_lu_inverse_factored(cublasHandle_t blasHandle, int n, cuComplex a[], int ipiv[])
DLLEXPORT CudaResults c_lu_inverse_factored(cublasHandle_t blasHandle, int n, cuComplex a[], int ipiv[], int *info)
{
return lu_inverse_factored(blasHandle, n, a, ipiv, cgetribatched);
return lu_inverse_factored(blasHandle, n, a, ipiv, cgetribatched, info);
}
DLLEXPORT int z_lu_inverse_factored(cublasHandle_t blasHandle, int n, cuDoubleComplex a[], int ipiv[])
DLLEXPORT CudaResults z_lu_inverse_factored(cublasHandle_t blasHandle, int n, cuDoubleComplex a[], int ipiv[], int *info)
{
return lu_inverse_factored(blasHandle, n, a, ipiv, zgetribatched);
return lu_inverse_factored(blasHandle, n, a, ipiv, zgetribatched, info);
}
DLLEXPORT int s_lu_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, float a[], int ipiv[], float b[])
DLLEXPORT CudaResults s_lu_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, float a[], int ipiv[], float b[], int *info)
{
return lu_solve_factored(solverHandle, n, nrhs, a, ipiv, b, sgetrs);
return lu_solve_factored(solverHandle, n, nrhs, a, ipiv, b, sgetrs, info);
}
DLLEXPORT int d_lu_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, double a[], int ipiv[], double b[])
DLLEXPORT CudaResults d_lu_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, double a[], int ipiv[], double b[], int *info)
{
return lu_solve_factored(solverHandle, n, nrhs, a, ipiv, b, dgetrs);
return lu_solve_factored(solverHandle, n, nrhs, a, ipiv, b, dgetrs, info);
}
DLLEXPORT int c_lu_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, cuComplex a[], int ipiv[], cuComplex b[])
DLLEXPORT CudaResults c_lu_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, cuComplex a[], int ipiv[], cuComplex b[], int *info)
{
return lu_solve_factored(solverHandle, n, nrhs, a, ipiv, b, cgetrs);
return lu_solve_factored(solverHandle, n, nrhs, a, ipiv, b, cgetrs, info);
}
DLLEXPORT int z_lu_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, cuDoubleComplex a[], int ipiv[], cuDoubleComplex b[])
DLLEXPORT CudaResults z_lu_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, cuDoubleComplex a[], int ipiv[], cuDoubleComplex b[], int *info)
{
return lu_solve_factored(solverHandle, n, nrhs, a, ipiv, b, zgetrs);
return lu_solve_factored(solverHandle, n, nrhs, a, ipiv, b, zgetrs, info);
}
DLLEXPORT int s_lu_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, float a[], float b[])
DLLEXPORT CudaResults s_lu_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, float a[], float b[], int *info)
{
return lu_solve(solverHandle, n, nrhs, a, b, sgetrf, sgetrs, sgetrfbsize);
return lu_solve(solverHandle, n, nrhs, a, b, sgetrf, sgetrs, sgetrfbsize, info);
}
DLLEXPORT int d_lu_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, double a[], double b[])
DLLEXPORT CudaResults d_lu_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, double a[], double b[], int *info)
{
return lu_solve(solverHandle, n, nrhs, a, b, dgetrf, dgetrs, dgetrfbsize);
return lu_solve(solverHandle, n, nrhs, a, b, dgetrf, dgetrs, dgetrfbsize, info);
}
DLLEXPORT int c_lu_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, cuComplex a[], cuComplex b[])
DLLEXPORT CudaResults c_lu_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, cuComplex a[], cuComplex b[], int *info)
{
return lu_solve(solverHandle, n, nrhs, a, b, cgetrf, cgetrs, cgetrfbsize);
return lu_solve(solverHandle, n, nrhs, a, b, cgetrf, cgetrs, cgetrfbsize, info);
}
DLLEXPORT int z_lu_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, cuDoubleComplex a[], cuDoubleComplex b[])
DLLEXPORT CudaResults z_lu_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, cuDoubleComplex a[], cuDoubleComplex b[], int *info)
{
return lu_solve(solverHandle, n, nrhs, a, b, zgetrf, zgetrs, zgetrfbsize);
return lu_solve(solverHandle, n, nrhs, a, b, zgetrf, zgetrs, zgetrfbsize, info);
}
DLLEXPORT int s_cholesky_factor(cusolverDnHandle_t solverHandle, int n, float a[])
DLLEXPORT CudaResults s_cholesky_factor(cusolverDnHandle_t solverHandle, int n, float a[], int *info)
{
return cholesky_factor(solverHandle, n, a, spotrf, spotrfbsize);
return cholesky_factor(solverHandle, n, a, spotrf, spotrfbsize, info);
}
DLLEXPORT int d_cholesky_factor(cusolverDnHandle_t solverHandle, int n, double a[])
DLLEXPORT CudaResults d_cholesky_factor(cusolverDnHandle_t solverHandle, int n, double a[], int *info)
{
return cholesky_factor(solverHandle, n, a, dpotrf, dpotrfbsize);
return cholesky_factor(solverHandle, n, a, dpotrf, dpotrfbsize, info);
}
DLLEXPORT int c_cholesky_factor(cusolverDnHandle_t solverHandle, int n, cuComplex a[])
DLLEXPORT CudaResults c_cholesky_factor(cusolverDnHandle_t solverHandle, int n, cuComplex a[], int *info)
{
return cholesky_factor(solverHandle, n, a, cpotrf, cpotrfbsize);
return cholesky_factor(solverHandle, n, a, cpotrf, cpotrfbsize, info);
}
DLLEXPORT int z_cholesky_factor(cusolverDnHandle_t solverHandle, int n, cuDoubleComplex a[])
DLLEXPORT CudaResults z_cholesky_factor(cusolverDnHandle_t solverHandle, int n, cuDoubleComplex a[], int *info)
{
return cholesky_factor(solverHandle, n, a, zpotrf, zpotrfbsize);
return cholesky_factor(solverHandle, n, a, zpotrf, zpotrfbsize, info);
}
DLLEXPORT int s_cholesky_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, float a[], float b[])
DLLEXPORT CudaResults s_cholesky_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, float a[], float b[], int *info)
{
return cholesky_solve(solverHandle, n, nrhs, a, b, spotrf, spotrs, spotrfbsize);
return cholesky_solve(solverHandle, n, nrhs, a, b, spotrf, spotrs, spotrfbsize, info);
}
DLLEXPORT int d_cholesky_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, double a[], double b[])
DLLEXPORT CudaResults d_cholesky_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, double a[], double b[], int *info)
{
return cholesky_solve(solverHandle, n, nrhs, a, b, dpotrf, dpotrs, dpotrfbsize);
return cholesky_solve(solverHandle, n, nrhs, a, b, dpotrf, dpotrs, dpotrfbsize, info);
}
DLLEXPORT int c_cholesky_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, cuComplex a[], cuComplex b[])
DLLEXPORT CudaResults c_cholesky_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, cuComplex a[], cuComplex b[], int *info)
{
return cholesky_solve(solverHandle, n, nrhs, a, b, cpotrf, cpotrs, cpotrfbsize);
return cholesky_solve(solverHandle, n, nrhs, a, b, cpotrf, cpotrs, cpotrfbsize, info);
}
DLLEXPORT int z_cholesky_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, cuDoubleComplex a[], cuDoubleComplex b[])
DLLEXPORT CudaResults z_cholesky_solve(cusolverDnHandle_t solverHandle, int n, int nrhs, cuDoubleComplex a[], cuDoubleComplex b[], int *info)
{
return cholesky_solve(solverHandle, n, nrhs, a, b, zpotrf, zpotrs, zpotrfbsize);
return cholesky_solve(solverHandle, n, nrhs, a, b, zpotrf, zpotrs, zpotrfbsize, info);
}
DLLEXPORT int s_cholesky_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, float a[], float b[])
DLLEXPORT CudaResults s_cholesky_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, float a[], float b[], int *info)
{
return cholesky_solve_factored(solverHandle, n, nrhs, a, b, spotrs);
return cholesky_solve_factored(solverHandle, n, nrhs, a, b, spotrs, info);
}
DLLEXPORT int d_cholesky_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, double a[], double b[])
DLLEXPORT CudaResults d_cholesky_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, double a[], double b[], int *info)
{
return cholesky_solve_factored(solverHandle, n, nrhs, a, b, dpotrs);
return cholesky_solve_factored(solverHandle, n, nrhs, a, b, dpotrs, info);
}
DLLEXPORT int c_cholesky_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, cuComplex a[], cuComplex b[])
DLLEXPORT CudaResults c_cholesky_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, cuComplex a[], cuComplex b[], int *info)
{
return cholesky_solve_factored(solverHandle, n, nrhs, a, b, cpotrs);
return cholesky_solve_factored(solverHandle, n, nrhs, a, b, cpotrs, info);
}
DLLEXPORT int z_cholesky_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, cuDoubleComplex a[], cuDoubleComplex b[])
DLLEXPORT CudaResults z_cholesky_solve_factored(cusolverDnHandle_t solverHandle, int n, int nrhs, cuDoubleComplex a[], cuDoubleComplex b[], int *info)
{
return cholesky_solve_factored(solverHandle, n, nrhs, a, b, zpotrs);
return cholesky_solve_factored(solverHandle, n, nrhs, a, b, zpotrs, info);
}
// MJ: I am fairly certain that it would be straightforward to implement ?orgqr and ?gels but I'm focusing on getting the low-hanging fruit working first
@ -971,24 +989,24 @@ extern "C" {
// return complex_qr_solve_factored(m, n, bn, r, b, tau, x, work, len, zunmqr, cublasZtrsm);
//}
DLLEXPORT int s_svd_factor(cusolverDnHandle_t solverHandle, bool compute_vectors, int m, int n, float a[], float s[], float u[], float v[])
DLLEXPORT CudaResults s_svd_factor(cusolverDnHandle_t solverHandle, bool compute_vectors, int m, int n, float a[], float s[], float u[], float v[], int *info)
{
return svd_factor(solverHandle, compute_vectors, m, n, a, s, u, v, sgesvd, sgesvdbsize);
return svd_factor(solverHandle, compute_vectors, m, n, a, s, u, v, sgesvd, sgesvdbsize, info);
}
DLLEXPORT int d_svd_factor(cusolverDnHandle_t solverHandle, bool compute_vectors, int m, int n, double a[], double s[], double u[], double v[])
DLLEXPORT CudaResults d_svd_factor(cusolverDnHandle_t solverHandle, bool compute_vectors, int m, int n, double a[], double s[], double u[], double v[], int *info)
{
return svd_factor(solverHandle, compute_vectors, m, n, a, s, u, v,dgesvd, dgesvdbsize);
return svd_factor(solverHandle, compute_vectors, m, n, a, s, u, v, dgesvd, dgesvdbsize, info);
}
DLLEXPORT int c_svd_factor(cusolverDnHandle_t solverHandle, bool compute_vectors, int m, int n, cuComplex a[], cuComplex s[], cuComplex u[], cuComplex v[])
DLLEXPORT CudaResults c_svd_factor(cusolverDnHandle_t solverHandle, bool compute_vectors, int m, int n, cuComplex a[], cuComplex s[], cuComplex u[], cuComplex v[], int *info)
{
return complex_svd_factor<cuComplex, float>(solverHandle, compute_vectors, m, n, a, s, u, v, cgesvd, cgesvdbsize);
return complex_svd_factor<cuComplex, float>(solverHandle, compute_vectors, m, n, a, s, u, v, cgesvd, cgesvdbsize, info);
}
DLLEXPORT int z_svd_factor(cusolverDnHandle_t solverHandle, bool compute_vectors, int m, int n, cuDoubleComplex a[], cuDoubleComplex s[], cuDoubleComplex u[], cuDoubleComplex v[])
DLLEXPORT CudaResults z_svd_factor(cusolverDnHandle_t solverHandle, bool compute_vectors, int m, int n, cuDoubleComplex a[], cuDoubleComplex s[], cuDoubleComplex u[], cuDoubleComplex v[], int *info)
{
return complex_svd_factor<cuDoubleComplex, double>(solverHandle, compute_vectors, m, n, a, s, u, v, zgesvd, zgesvdbsize);
return complex_svd_factor<cuDoubleComplex, double>(solverHandle, compute_vectors, m, n, a, s, u, v, zgesvd, zgesvdbsize, info);
}
/*DLLEXPORT int s_eigen(bool isSymmetric, int n, float a[], float vectors[], cuDoubleComplex values[], float d[])

0
src/NativeProviders/CUDA/memory.c

18
src/NativeProviders/CUDA/wrapper_cuda.h

@ -0,0 +1,18 @@
#ifndef WRAPPER_CUDA_H
#define WRAPPER_CUDA_H
#include "wrapper_common.h"
#include "cuda_runtime.h"
#include "cublas_v2.h"
#include "cusolver_common.h"
#define SAFECUDACALL(error,call) {error = call; if(error){goto exit;}}
typedef struct
{
cudaError_t error;
cublasStatus_t blasStatus;
cusolverStatus_t solverStatus;
} CudaResults;
#endif

1
src/NativeProviders/Windows/CUDA/CUDAWrapper.vcxproj

@ -29,6 +29,7 @@
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\CUDA\resource.h" />
<ClInclude Include="..\..\CUDA\wrapper_cuda.h" />
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{5A52B796-7F41-4C90-8DE2-F3F391C4482C}</ProjectGuid>

3
src/NativeProviders/Windows/CUDA/CUDAWrapper.vcxproj.filters

@ -37,5 +37,8 @@
<ClInclude Include="..\..\CUDA\resource.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\CUDA\wrapper_cuda.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
</Project>

4
src/Numerics/Control.cs

@ -261,6 +261,10 @@ namespace MathNet.Numerics
{
value.InitializeVerify();
// dispose the previous value if necessary
if (_linearAlgebraProvider != null && _linearAlgebraProvider is IDisposable)
(_linearAlgebraProvider as IDisposable).Dispose();
// only actually set if verification did not throw
_linearAlgebraProvider = value;
}

4
src/Numerics/Numerics.csproj

@ -159,12 +159,16 @@
<Compile Include="Providers\LinearAlgebra\Acml\AcmlLinearAlgebraProvider.Double.cs" />
<Compile Include="Providers\LinearAlgebra\Acml\AcmlLinearAlgebraProvider.Single.cs" />
<Compile Include="Providers\LinearAlgebra\Acml\SafeNativeMethods.cs" />
<Compile Include="Providers\LinearAlgebra\Cuda\CuBLASException.cs" />
<Compile Include="Providers\LinearAlgebra\Cuda\CudaException.cs" />
<Compile Include="Providers\LinearAlgebra\Cuda\CudaLinearAlgebraProvider.Complex.cs" />
<Compile Include="Providers\LinearAlgebra\Cuda\CudaLinearAlgebraProvider.Complex32.cs" />
<Compile Include="Providers\LinearAlgebra\Cuda\CudaLinearAlgebraProvider.cs" />
<Compile Include="Providers\LinearAlgebra\Cuda\CudaLinearAlgebraProvider.Double.cs" />
<Compile Include="Providers\LinearAlgebra\Cuda\CudaLinearAlgebraProvider.Single.cs" />
<Compile Include="Providers\LinearAlgebra\Cuda\CudaProviderCapabilities.cs" />
<Compile Include="Providers\LinearAlgebra\Cuda\CudaResults.cs" />
<Compile Include="Providers\LinearAlgebra\Cuda\CuSolverException.cs" />
<Compile Include="Providers\LinearAlgebra\Cuda\SafeNativeMethods.cs" />
<Compile Include="Providers\LinearAlgebra\Mkl\MklProviderCapabilities.cs" />
<Compile Include="Providers\LinearAlgebra\OpenBlas\OpenBlasLinearAlgebraProvider.cs" />

110
src/Numerics/Providers/LinearAlgebra/Cuda/CuSolverException.cs

@ -0,0 +1,110 @@
// <copyright file="CuSolverException.cs" company="Math.NET">
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
// http://mathnetnumerics.codeplex.com
//
// Copyright (c) 2009-2013 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// </copyright>
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
{
/// <summary>
/// Exceptions thrown by the cuSolverDn API.
/// </summary>
public class CuSolverException : Exception
{
/// <summary>
/// Initializes a new instance of the <see cref="CuSolverException"/> class.
/// </summary>
/// <param name="statusCode">The status code returned from the API</param>
public CuSolverException(int statusCode)
: base(CuSolverException.GetErrorMessage(statusCode))
{
this.StatusCode = statusCode;
}
/// <summary>
/// Gets the status code returned by the cuSolverDn API
/// </summary>
public int StatusCode { get; private set; }
/// <summary>
/// Returns the appropriate error message for each status code.
/// </summary>
/// <param name="statusCode">The status code returned from the API</param>
/// <returns>The corresponding error message</returns>
private static string GetErrorMessage(int statusCode)
{
switch (statusCode)
{
case 0: // CUSOLVER_STATUS_SUCCESS
return "The operation completed successfully.";
case 1: // CUSOLVER_STATUS_NOT_INITIALIZED
return "The cuSolver library was not initialized. This is usually caused by the lack of a prior call, an error in the CUDA Runtime API called by the cuSolver routine, or an error in the hardware setup.";
case 2: // CUSOLVER_STATUS_ALLOC_FAILED
return "Resource allocation failed inside the cuSolver library. This is usually caused by a cudaMalloc() failure.";
case 3: // CUSOLVER_STATUS_INVALID_VALUE
return "An unsupported value or parameter was passed to the function (a negative vector size, for example).";
case 4: // CUSOLVER_STATUS_ARCH_MISMATCH
return "The function requires a feature absent from the device architecture; usually caused by the lack of support for atomic operations or double precision.";
case 5: // CUSOLVER_STATUS_MAPPING_ERROR
return "Mapping Error";
case 6: // CUSOLVER_STATUS_EXECUTION_FAILED
return "The GPU program failed to execute. This is often caused by a launch failure of the kernel on the GPU, which can be caused by multiple reasons.";
case 7: //CUSOLVER_STATUS_INTERNAL_ERROR
return "An internal cuSolver operation failed. This error is usually caused by a cudaMemcpyAsync() failure.";
case 8: // CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED
return "The matrix type is not supported by this function. This is usually caused by passing an invalid matrix descriptor to the function.";
case 9: // CUSOLVER_STATUS_NOT_SUPPORTED
return "The functionality requested is not supported";
case 10: // CUSOLVER_STATUS_ZERO_PIVOT
return "Zero Pivot";
case 11: //CUSOLVER_STATUS_INVALID_LICENSE
return "The functionality requested requires some license and an error was detected when trying to check the current licensing. This error can happen if the license is not present or is expired or if the environment variable NVIDIA_LICENSE_FILE is not set properly.";
default:
throw new Exception("Unrecognized cuSolverDn status code");
}
}
}
}

102
src/Numerics/Providers/LinearAlgebra/Cuda/CublasException.cs

@ -0,0 +1,102 @@
// <copyright file="CuBLASException.cs" company="Math.NET">
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
// http://mathnetnumerics.codeplex.com
//
// Copyright (c) 2009-2013 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// </copyright>
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
{
/// <summary>
/// Exceptions thrown by the cuBLAS api.
/// </summary>
public class CuBLASException : Exception
{
/// <summary>
/// Initializes a new instance of the <see cref="CuBLASException"/> class.
/// </summary>
/// <param name="statusCode">The status code returned from the API</param>
public CuBLASException(int statusCode)
: base(CuBLASException.GetErrorMessage(statusCode))
{
this.StatusCode = statusCode;
}
/// <summary>
/// Gets the status code returned by the cuBLAS API.
/// </summary>
public int StatusCode { get; private set; }
/// <summary>
/// Returns the appropriate error message for each status code.
/// </summary>
/// <param name="statusCode">The status code returned from the API</param>
/// <returns>The corresponding error message</returns>
private static string GetErrorMessage(int statusCode)
{
switch (statusCode)
{
case 0: // CUBLAS_STATUS_SUCCESS
return "The operation completed successfully.";
case 1: // CUBLAS_STATUS_NOT_INITIALIZED
return "The cuBLAS library was not initialized. This is usually caused by the lack of a prior cublasCreate() call, an error in the CUDA Runtime API called by the cuBLAS routine, or an error in the hardware setup.";
case 2: // CUSOLVER_STATUS_ALLOC_FAILED
return "Resource allocation failed inside the cuBLAS library. This is usually caused by a cudaMalloc() failure.";
case 7: // CUBLAS_STATUS_INVALID_VALUE
return "An unsupported value or parameter was passed to the function (a negative vector size, for example).";
case 8: // CUBLAS_STATUS_ARCH_MISMATCH
return "The function requires a feature absent from the device architecture; usually caused by the lack of support for double precision.";
case 11: // CUBLAS_STATUS_MAPPING_ERROR
return "An access to GPU memory space failed, which is usually caused by a failure to bind a texture.";
case 13: // CUBLAS_STATUS_EXECUTION_FAILED
return "The GPU program failed to execute. This is often caused by a launch failure of the kernel on the GPU, which can be caused by multiple reasons.";
case 14: // CUBLAS_STATUS_INTERNAL_ERROR
return "An internal cuBLAS operation failed. This error is usually caused by a cudaMemcpyAsync() failure.";
case 15: // CUBLAS_STATUS_NOT_SUPPORTED
return "The functionality requested is not supported";
case 16: // CUBLAS_STATUS_LICENSE_ERROR
return "The functionality requested requires some license and an error was detected when trying to check the current licensing. This error can happen if the license is not present or is expired or if the environment variable NVIDIA_LICENSE_FILE is not set properly.";
default:
return "Unrecognized cuBLAS status code";
}
}
}
}

90
src/Numerics/Providers/LinearAlgebra/Cuda/CudaException.cs

@ -0,0 +1,90 @@
// <copyright file="CudaException.cs" company="Math.NET">
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
// http://mathnetnumerics.codeplex.com
//
// Copyright (c) 2009-2013 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// </copyright>
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
{
/// <summary>
/// Exception thrown by the Cuda Runtime API
/// </summary>
public class CudaException : Exception
{
/// <summary>
/// Initializes a new instance of the <see cref="CudaException"/> class.
/// </summary>
/// <param name="errorCode">The error code returned by the API</param>
public CudaException(int errorCode)
: base(CudaException.GetErrorMessage(errorCode))
{
this.ErrorCode = errorCode;
}
/// <summary>
/// Gets the error code returned by the Cuda Runtime API.
/// </summary>
public int ErrorCode { get; private set; }
/// <summary>
/// Gets the error message for a particular error code.
/// </summary>
/// <param name="errorCode">The error code returned by the API</param>
/// <returns>The corresponding error message</returns>
private static string GetErrorMessage(int errorCode)
{
switch (errorCode)
{
case 0: // cudaSuccess
return "The API call returned with no errors.";
case 2: // cudaErrorMemoryAllocation
return "The API call failed because it was unable to allocate enough memory to perform the requested operation.";
case 3: // cudaErrorInitializationError
return "The API call failed because the CUDA driver and runtime could not be initialized.";
case 11: // cudaErrorInvalidValue
return "This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values.";
case 17: // cudaErrorInvalidDevicePointer
return "This indicates that at least one device pointer passed to the API call is not a valid device pointer. ";
case 21: // cudaErrorInvalidMemcpyDirection
return "This indicates that the direction of the memcpy passed to the API call is not one of the types specified by cudaMemcpyKind. ";
default:
return "Unknown Cuda Runtime error code";
}
}
}
}

53
src/Numerics/Providers/LinearAlgebra/Cuda/CudaLinearAlgebraProvider.Complex.cs

@ -68,7 +68,9 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength);
}
return SafeNativeMethods.z_dot_product(_blasHandle, x.Length, x, y);
Complex result = 0;
HandleResults(SafeNativeMethods.z_dot_product(_blasHandle, x.Length, x, y, ref result));
return result;
}
/// <summary>
@ -107,7 +109,7 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
return;
}
SafeNativeMethods.z_axpy(_blasHandle, y.Length, alpha, x, result);
HandleResults(SafeNativeMethods.z_axpy(_blasHandle, y.Length, alpha, x, result));
}
/// <summary>
@ -135,7 +137,7 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
return;
}
SafeNativeMethods.z_scale(_blasHandle, x.Length, alpha, result);
HandleResults(SafeNativeMethods.z_scale(_blasHandle, x.Length, alpha, result));
}
/// <summary>
@ -202,7 +204,7 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentMatrixDimensions);
}
SafeNativeMethods.z_matrix_multiply(_blasHandle, transposeA.ToCUDA(), transposeB.ToCUDA(), m, n, k, alpha, a, b, beta, c);
HandleResults(SafeNativeMethods.z_matrix_multiply(_blasHandle, transposeA.ToCUDA(), transposeB.ToCUDA(), m, n, k, alpha, a, b, beta, c));
}
/// <summary>
@ -237,7 +239,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "ipiv");
}
Solver(SafeNativeMethods.z_lu_factor(_solverHandle, order, data, ipiv));
int info = 0;
HandleResults(SafeNativeMethods.z_lu_factor(_solverHandle, order, data, ipiv, ref info));
}
/// <summary>
@ -259,7 +262,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "a");
}
Solver(SafeNativeMethods.z_lu_inverse(_solverHandle, _blasHandle, order, a));
int info = 0;
HandleResults(SafeNativeMethods.z_lu_inverse(_solverHandle, _blasHandle, order, a, ref info));
}
/// <summary>
@ -292,7 +296,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "ipiv");
}
BLAS(SafeNativeMethods.z_lu_inverse_factored(_blasHandle, order, a, ipiv));
int info = 0;
HandleResults(SafeNativeMethods.z_lu_inverse_factored(_blasHandle, order, a, ipiv, ref info));
}
/// <summary>
@ -320,7 +325,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.UserWorkBufferNotSupported);
}
Solver(SafeNativeMethods.z_lu_inverse(_solverHandle, _blasHandle, order, a));
int info = 0;
HandleResults(SafeNativeMethods.z_lu_inverse(_solverHandle, _blasHandle, order, a, ref info));
}
/// <summary>
@ -359,7 +365,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.UserWorkBufferNotSupported);
}
BLAS(SafeNativeMethods.z_lu_inverse_factored(_blasHandle, order, a, ipiv));
int info = 0;
HandleResults(SafeNativeMethods.z_lu_inverse_factored(_blasHandle, order, a, ipiv, ref info));
}
/// <summary>
@ -393,7 +400,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.z_lu_solve(_solverHandle, order, columnsOfB, a, b));
int info = 0;
HandleResults(SafeNativeMethods.z_lu_solve(_solverHandle, order, columnsOfB, a, b, ref info));
}
/// <summary>
@ -438,7 +446,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.z_lu_solve_factored(_solverHandle, order, columnsOfB, a, ipiv, b));
int info = 0;
HandleResults(SafeNativeMethods.z_lu_solve_factored(_solverHandle, order, columnsOfB, a, ipiv, b, ref info));
}
/// <summary>
@ -466,7 +475,13 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "a");
}
Solver(SafeNativeMethods.z_cholesky_factor(_solverHandle, order, a));
int info = 0;
HandleResults(SafeNativeMethods.z_cholesky_factor(_solverHandle, order, a, ref info));
if (info > 0)
{
throw new ArgumentException(Resources.ArgumentMatrixPositiveDefinite);
}
}
/// <summary>
@ -501,7 +516,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.z_cholesky_solve(_solverHandle, orderA, columnsB, a, b));
int info = 0;
HandleResults(SafeNativeMethods.z_cholesky_solve(_solverHandle, orderA, columnsB, a, b, ref info));
}
/// <summary>
@ -535,7 +551,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.z_cholesky_solve_factored(_solverHandle, orderA, columnsB, a, b));
int info = 0;
HandleResults(SafeNativeMethods.z_cholesky_solve_factored(_solverHandle, orderA, columnsB, a, b, ref info));
}
/// <summary>
@ -697,7 +714,13 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
if (columnsA > rowsA || !computeVectors) // see remarks http://docs.nvidia.com/cuda/cusolver/index.html#cuds-lt-t-gt-gesvd
base.SingularValueDecomposition(computeVectors, a, rowsA, columnsA, s, u, vt, new Complex[rowsA]);
else Solver(SafeNativeMethods.z_svd_factor(_solverHandle, computeVectors, rowsA, columnsA, a, s, u, vt));
else
{
int info = 0;
HandleResults(SafeNativeMethods.z_svd_factor(_solverHandle, computeVectors, rowsA, columnsA, a, s, u, vt, ref info));
if (info != 0)
throw new NonConvergenceException();
}
}
}
}

53
src/Numerics/Providers/LinearAlgebra/Cuda/CudaLinearAlgebraProvider.Complex32.cs

@ -68,7 +68,9 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength);
}
return SafeNativeMethods.c_dot_product(_blasHandle, x.Length, x, y);
Complex32 result = 0;
HandleResults(SafeNativeMethods.c_dot_product(_blasHandle, x.Length, x, y, ref result));
return result;
}
/// <summary>
@ -107,7 +109,7 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
return;
}
SafeNativeMethods.c_axpy(_blasHandle, y.Length, alpha, x, result);
HandleResults(SafeNativeMethods.c_axpy(_blasHandle, y.Length, alpha, x, result));
}
/// <summary>
@ -135,7 +137,7 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
return;
}
SafeNativeMethods.c_scale(_blasHandle, x.Length, alpha, result);
HandleResults(SafeNativeMethods.c_scale(_blasHandle, x.Length, alpha, result));
}
/// <summary>
@ -202,7 +204,7 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentMatrixDimensions);
}
SafeNativeMethods.c_matrix_multiply(_blasHandle, transposeA.ToCUDA(), transposeB.ToCUDA(), m, n, k, alpha, a, b, beta, c);
HandleResults(SafeNativeMethods.c_matrix_multiply(_blasHandle, transposeA.ToCUDA(), transposeB.ToCUDA(), m, n, k, alpha, a, b, beta, c));
}
/// <summary>
@ -237,7 +239,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "ipiv");
}
Solver(SafeNativeMethods.c_lu_factor(_solverHandle, order, data, ipiv));
int info = 0;
HandleResults(SafeNativeMethods.c_lu_factor(_solverHandle, order, data, ipiv, ref info));
}
/// <summary>
@ -259,7 +262,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "a");
}
Solver(SafeNativeMethods.c_lu_inverse(_solverHandle, _blasHandle, order, a));
int info = 0;
HandleResults(SafeNativeMethods.c_lu_inverse(_solverHandle, _blasHandle, order, a, ref info));
}
/// <summary>
@ -292,7 +296,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "ipiv");
}
BLAS(SafeNativeMethods.c_lu_inverse_factored(_blasHandle, order, a, ipiv));
int info = 0;
HandleResults(SafeNativeMethods.c_lu_inverse_factored(_blasHandle, order, a, ipiv, ref info));
}
/// <summary>
@ -320,7 +325,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.UserWorkBufferNotSupported);
}
Solver(SafeNativeMethods.c_lu_inverse(_solverHandle, _blasHandle, order, a));
int info = 0;
HandleResults(SafeNativeMethods.c_lu_inverse(_solverHandle, _blasHandle, order, a, ref info));
}
/// <summary>
@ -359,7 +365,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.UserWorkBufferNotSupported);
}
BLAS(SafeNativeMethods.c_lu_inverse_factored(_blasHandle, order, a, ipiv));
int info = 0;
HandleResults(SafeNativeMethods.c_lu_inverse_factored(_blasHandle, order, a, ipiv, ref info));
}
/// <summary>
@ -393,7 +400,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.c_lu_solve(_solverHandle, order, columnsOfB, a, b));
int info = 0;
HandleResults(SafeNativeMethods.c_lu_solve(_solverHandle, order, columnsOfB, a, b, ref info));
}
/// <summary>
@ -438,7 +446,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.c_lu_solve_factored(_solverHandle, order, columnsOfB, a, ipiv, b));
int info = 0;
HandleResults(SafeNativeMethods.c_lu_solve_factored(_solverHandle, order, columnsOfB, a, ipiv, b, ref info));
}
/// <summary>
@ -466,7 +475,13 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "a");
}
Solver(SafeNativeMethods.c_cholesky_factor(_solverHandle, order, a));
int info = 0;
HandleResults(SafeNativeMethods.c_cholesky_factor(_solverHandle, order, a, ref info));
if (info > 0)
{
throw new ArgumentException(Resources.ArgumentMatrixPositiveDefinite);
}
}
/// <summary>
@ -501,7 +516,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.c_cholesky_solve(_solverHandle, orderA, columnsB, a, b));
int info = 0;
HandleResults(SafeNativeMethods.c_cholesky_solve(_solverHandle, orderA, columnsB, a, b, ref info));
}
/// <summary>
@ -535,7 +551,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.c_cholesky_solve_factored(_solverHandle, orderA, columnsB, a, b));
int info = 0;
HandleResults(SafeNativeMethods.c_cholesky_solve_factored(_solverHandle, orderA, columnsB, a, b, ref info));
}
/// <summary>
@ -697,7 +714,13 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
if (columnsA > rowsA || !computeVectors) // see remarks http://docs.nvidia.com/cuda/cusolver/index.html#cuds-lt-t-gt-gesvd
base.SingularValueDecomposition(computeVectors, a, rowsA, columnsA, s, u, vt, new Complex32[rowsA]);
else Solver(SafeNativeMethods.c_svd_factor(_solverHandle, computeVectors, rowsA, columnsA, a, s, u, vt));
else
{
int info = 0;
HandleResults(SafeNativeMethods.c_svd_factor(_solverHandle, computeVectors, rowsA, columnsA, a, s, u, vt, ref info));
if (info != 0)
throw new NonConvergenceException();
}
}
}
}

53
src/Numerics/Providers/LinearAlgebra/Cuda/CudaLinearAlgebraProvider.Double.cs

@ -68,7 +68,9 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength);
}
return SafeNativeMethods.d_dot_product(_blasHandle, x.Length, x, y);
double result = 0;
HandleResults(SafeNativeMethods.d_dot_product(_blasHandle, x.Length, x, y, ref result));
return result;
}
/// <summary>
@ -107,7 +109,7 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
return;
}
SafeNativeMethods.d_axpy(_blasHandle, y.Length, alpha, x, result);
HandleResults(SafeNativeMethods.d_axpy(_blasHandle, y.Length, alpha, x, result));
}
/// <summary>
@ -135,7 +137,7 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
return;
}
SafeNativeMethods.d_scale(_blasHandle, x.Length, alpha, result);
HandleResults(SafeNativeMethods.d_scale(_blasHandle, x.Length, alpha, result));
}
/// <summary>
@ -202,7 +204,7 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentMatrixDimensions);
}
SafeNativeMethods.d_matrix_multiply(_blasHandle, transposeA.ToCUDA(), transposeB.ToCUDA(), m, n, k, alpha, a, b, beta, c);
HandleResults(SafeNativeMethods.d_matrix_multiply(_blasHandle, transposeA.ToCUDA(), transposeB.ToCUDA(), m, n, k, alpha, a, b, beta, c));
}
/// <summary>
@ -237,7 +239,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "ipiv");
}
Solver(SafeNativeMethods.d_lu_factor(_solverHandle, order, data, ipiv));
int info = 0;
HandleResults(SafeNativeMethods.d_lu_factor(_solverHandle, order, data, ipiv, ref info));
}
/// <summary>
@ -259,7 +262,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "a");
}
Solver(SafeNativeMethods.d_lu_inverse(_solverHandle, _blasHandle, order, a));
int info = 0;
HandleResults(SafeNativeMethods.d_lu_inverse(_solverHandle, _blasHandle, order, a, ref info));
}
/// <summary>
@ -292,7 +296,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "ipiv");
}
BLAS(SafeNativeMethods.d_lu_inverse_factored(_blasHandle, order, a, ipiv));
int info = 0;
HandleResults(SafeNativeMethods.d_lu_inverse_factored(_blasHandle, order, a, ipiv, ref info));
}
/// <summary>
@ -320,7 +325,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.UserWorkBufferNotSupported);
}
Solver(SafeNativeMethods.d_lu_inverse(_solverHandle, _blasHandle, order, a));
int info = 0;
HandleResults(SafeNativeMethods.d_lu_inverse(_solverHandle, _blasHandle, order, a, ref info));
}
/// <summary>
@ -359,7 +365,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.UserWorkBufferNotSupported);
}
BLAS(SafeNativeMethods.d_lu_inverse_factored(_blasHandle, order, a, ipiv));
int info = 0;
HandleResults(SafeNativeMethods.d_lu_inverse_factored(_blasHandle, order, a, ipiv, ref info));
}
/// <summary>
@ -393,7 +400,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.d_lu_solve(_solverHandle, order, columnsOfB, a, b));
int info = 0;
HandleResults(SafeNativeMethods.d_lu_solve(_solverHandle, order, columnsOfB, a, b, ref info));
}
/// <summary>
@ -438,7 +446,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.d_lu_solve_factored(_solverHandle, order, columnsOfB, a, ipiv, b));
int info = 0;
HandleResults(SafeNativeMethods.d_lu_solve_factored(_solverHandle, order, columnsOfB, a, ipiv, b, ref info));
}
/// <summary>
@ -466,7 +475,13 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "a");
}
Solver(SafeNativeMethods.d_cholesky_factor(_solverHandle, order, a));
int info = 0;
HandleResults(SafeNativeMethods.d_cholesky_factor(_solverHandle, order, a, ref info));
if (info > 0)
{
throw new ArgumentException(Resources.ArgumentMatrixPositiveDefinite);
}
}
/// <summary>
@ -501,7 +516,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.d_cholesky_solve(_solverHandle, orderA, columnsB, a, b));
int info = 0;
HandleResults(SafeNativeMethods.d_cholesky_solve(_solverHandle, orderA, columnsB, a, b, ref info));
}
/// <summary>
@ -535,7 +551,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.d_cholesky_solve_factored(_solverHandle, orderA, columnsB, a, b));
int info = 0;
HandleResults(SafeNativeMethods.d_cholesky_solve_factored(_solverHandle, orderA, columnsB, a, b, ref info));
}
/// <summary>
@ -697,7 +714,13 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
if (columnsA > rowsA || !computeVectors) // see remarks http://docs.nvidia.com/cuda/cusolver/index.html#cuds-lt-t-gt-gesvd
base.SingularValueDecomposition(computeVectors, a, rowsA, columnsA, s, u, vt, new double[rowsA]);
else Solver (SafeNativeMethods.d_svd_factor(_solverHandle, computeVectors, rowsA, columnsA, a, s, u, vt));
else
{
int info = 0;
HandleResults(SafeNativeMethods.d_svd_factor(_solverHandle, computeVectors, rowsA, columnsA, a, s, u, vt, ref info));
if (info != 0)
throw new NonConvergenceException();
}
}
}
}

53
src/Numerics/Providers/LinearAlgebra/Cuda/CudaLinearAlgebraProvider.Single.cs

@ -68,7 +68,9 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength);
}
return SafeNativeMethods.s_dot_product(_blasHandle, x.Length, x, y);
float result = 0;
HandleResults(SafeNativeMethods.s_dot_product(_blasHandle, x.Length, x, y, ref result));
return result;
}
/// <summary>
@ -107,7 +109,7 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
return;
}
SafeNativeMethods.s_axpy(_blasHandle, y.Length, alpha, x, result);
HandleResults(SafeNativeMethods.s_axpy(_blasHandle, y.Length, alpha, x, result));
}
/// <summary>
@ -135,7 +137,7 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
return;
}
SafeNativeMethods.s_scale(_blasHandle, x.Length, alpha, result);
HandleResults(SafeNativeMethods.s_scale(_blasHandle, x.Length, alpha, result));
}
/// <summary>
@ -202,7 +204,7 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentMatrixDimensions);
}
SafeNativeMethods.s_matrix_multiply(_blasHandle, transposeA.ToCUDA(), transposeB.ToCUDA(), m, n, k, alpha, a, b, beta, c);
HandleResults(SafeNativeMethods.s_matrix_multiply(_blasHandle, transposeA.ToCUDA(), transposeB.ToCUDA(), m, n, k, alpha, a, b, beta, c));
}
/// <summary>
@ -237,7 +239,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "ipiv");
}
Solver(SafeNativeMethods.s_lu_factor(_solverHandle, order, data, ipiv));
int info = 0;
HandleResults(SafeNativeMethods.s_lu_factor(_solverHandle, order, data, ipiv, ref info));
}
/// <summary>
@ -259,7 +262,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "a");
}
Solver(SafeNativeMethods.s_lu_inverse(_solverHandle, _blasHandle, order, a));
int info = 0;
HandleResults(SafeNativeMethods.s_lu_inverse(_solverHandle, _blasHandle, order, a, ref info));
}
/// <summary>
@ -292,7 +296,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "ipiv");
}
BLAS(SafeNativeMethods.s_lu_inverse_factored(_blasHandle, order, a, ipiv));
int info = 0;
HandleResults(SafeNativeMethods.s_lu_inverse_factored(_blasHandle, order, a, ipiv, ref info));
}
/// <summary>
@ -320,7 +325,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.UserWorkBufferNotSupported);
}
Solver(SafeNativeMethods.s_lu_inverse(_solverHandle, _blasHandle, order, a));
int info = 0;
HandleResults(SafeNativeMethods.s_lu_inverse(_solverHandle, _blasHandle, order, a, ref info));
}
/// <summary>
@ -359,7 +365,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.UserWorkBufferNotSupported);
}
BLAS(SafeNativeMethods.s_lu_inverse_factored(_blasHandle, order, a, ipiv));
int info = 0;
HandleResults(SafeNativeMethods.s_lu_inverse_factored(_blasHandle, order, a, ipiv, ref info));
}
/// <summary>
@ -393,7 +400,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.s_lu_solve(_solverHandle, order, columnsOfB, a, b));
int info = 0;
HandleResults(SafeNativeMethods.s_lu_solve(_solverHandle, order, columnsOfB, a, b, ref info));
}
/// <summary>
@ -438,7 +446,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.s_lu_solve_factored(_solverHandle, order, columnsOfB, a, ipiv, b));
int info = 0;
HandleResults(SafeNativeMethods.s_lu_solve_factored(_solverHandle, order, columnsOfB, a, ipiv, b, ref info));
}
/// <summary>
@ -466,7 +475,13 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentArraysSameLength, "a");
}
Solver(SafeNativeMethods.s_cholesky_factor(_solverHandle, order, a));
int info = 0;
HandleResults(SafeNativeMethods.s_cholesky_factor(_solverHandle, order, a, ref info));
if (info > 0)
{
throw new ArgumentException(Resources.ArgumentMatrixPositiveDefinite);
}
}
/// <summary>
@ -501,7 +516,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.s_cholesky_solve(_solverHandle, orderA, columnsB, a, b));
int info = 0;
HandleResults(SafeNativeMethods.s_cholesky_solve(_solverHandle, orderA, columnsB, a, b, ref info));
}
/// <summary>
@ -535,7 +551,8 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
throw new ArgumentException(Resources.ArgumentReferenceDifferent);
}
Solver(SafeNativeMethods.s_cholesky_solve_factored(_solverHandle, orderA, columnsB, a, b));
int info = 0;
HandleResults(SafeNativeMethods.s_cholesky_solve_factored(_solverHandle, orderA, columnsB, a, b, ref info));
}
/// <summary>
@ -697,7 +714,13 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
if (columnsA > rowsA || !computeVectors) // see remarks http://docs.nvidia.com/cuda/cusolver/index.html#cuds-lt-t-gt-gesvd
base.SingularValueDecomposition(computeVectors, a, rowsA, columnsA, s, u, vt, new float[rowsA]);
else Solver(SafeNativeMethods.s_svd_factor(_solverHandle, computeVectors, rowsA, columnsA, a, s, u, vt));
else
{
int info = 0;
HandleResults(SafeNativeMethods.s_svd_factor(_solverHandle, computeVectors, rowsA, columnsA, a, s, u, vt, ref info));
if (info != 0)
throw new NonConvergenceException();
}
}
}
}

100
src/Numerics/Providers/LinearAlgebra/Cuda/CudaLinearAlgebraProvider.cs

@ -83,109 +83,35 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
if (a != 0 || b != -1 || linearAlgebra <=0 || _nativeRevision < 1)
{
throw new NotSupportedException("Cuda Native Provider too old or not compatible. Consider upgrading to a newer version.");
throw new NotSupportedException("Cuda Native Provider not present, too old or not compatible. Consider upgrading to a newer version.");
}
BLAS(SafeNativeMethods.createBLASHandle(ref _blasHandle));
Solver(SafeNativeMethods.createSolverHandle(ref _solverHandle));
HandleResults(SafeNativeMethods.createBLASHandle(ref _blasHandle));
HandleResults(SafeNativeMethods.createSolverHandle(ref _solverHandle));
}
private void BLAS(int status)
private void HandleResults(CudaResults results)
{
switch (status)
{
case 0: // CUBLAS_STATUS_SUCCESS
return;
case 1: // CUBLAS_STATUS_NOT_INITIALIZED
throw new Exception("The CUDA Runtime initialization failed");
case 2: // CUSOLVER_STATUS_ALLOC_FAILED
throw new OutOfMemoryException("The resources could not be allocated");
case 7: // CUBLAS_STATUS_INVALID_VALUE
throw new ArgumentException("Invalid value");
case 8: // CUBLAS_STATUS_ARCH_MISMATCH
throw new NotSupportedException("The device does not support this opeation.");
case 11: // CUBLAS_STATUS_MAPPING_ERROR
throw new Exception("Mapping error.");
case 13: // CUBLAS_STATUS_EXECUTION_FAILED
throw new Exception("Execution failed");
if (results.Error != 0)
throw new CudaException(results.Error);
case 14: // CUBLAS_STATUS_INTERNAL_ERROR
throw new Exception("Internal error");
if (results.BlasStatus != 0)
throw new CuBLASException(results.BlasStatus);
case 15: // CUBLAS_STATUS_NOT_SUPPORTED
throw new NotSupportedException();
case 16: // CUBLAS_STATUS_LICENSE_ERROR
throw new Exception("License error");
default:
throw new Exception("Unrecognized cuBLAS status code: " + status);
}
}
private void Solver(int status)
{
switch (status)
{
case 0: // CUSOLVER_STATUS_SUCCESS
return;
case 1: // CUSOLVER_STATUS_NOT_INITIALIZED
throw new Exception("The library was not initialized");
case 2: // CUSOLVER_STATUS_ALLOC_FAILED
throw new OutOfMemoryException("The resources could not be allocated");
case 3: // CUSOLVER_STATUS_INVALID_VALUE
throw new ArgumentException("Invalid value");
case 4: // CUSOLVER_STATUS_ARCH_MISMATCH
throw new NotSupportedException("The device does not support compute capability 2.0 and above");
case 5: // CUSOLVER_STATUS_MAPPING_ERROR
throw new Exception("Mapping error");
case 6: // CUSOLVER_STATUS_EXECUTION_FAILED
throw new NonConvergenceException("Execution failed");
case 7: //CUSOLVER_STATUS_INTERNAL_ERROR
throw new Exception("Internal error");
case 8: // CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED
throw new ArgumentException("Matrix type not supported");
case 9: // CUSOLVER_STATUS_NOT_SUPPORTED
throw new NotSupportedException();
case 10: // CUSOLVER_STATUS_ZERO_PIVOT
throw new Exception("Zero pivot");
case 11: //CUSOLVER_STATUS_INVALID_LICENSE
throw new Exception("Invalid license");
default:
throw new Exception("Unrecognized cuSolverDn status code: " + status);
}
if (results.SolverStatus != 0)
throw new CuSolverException(results.SolverStatus);
}
public override string ToString()
{
return string.Format("Nvidia CUDA ({1}; revision {0})",
_nativeRevision,
_nativeIX86 ? "x86" : _nativeX64 ? "x64" : _nativeIA64 ? "IA64" : "unknown");
return string.Format("Nvidia CUDA ({1}; compute capability {0})", _nativeRevision, _nativeIX86 ? "x86" : _nativeX64 ? "x64" : _nativeIA64 ? "IA64" : "unknown");
}
public void Dispose()
{
BLAS(SafeNativeMethods.destroyBLASHandle(_blasHandle));
Solver(SafeNativeMethods.destroySolverHandle(_solverHandle));
HandleResults(SafeNativeMethods.destroyBLASHandle(_blasHandle));
HandleResults(SafeNativeMethods.destroySolverHandle(_solverHandle));
}
}
}

58
src/Numerics/Providers/LinearAlgebra/Cuda/CudaResults.cs

@ -0,0 +1,58 @@
// <copyright file="CudaResults.cs" company="Math.NET">
// Math.NET Numerics, part of the Math.NET Project
// http://numerics.mathdotnet.com
// http://github.com/mathnet/mathnet-numerics
// http://mathnetnumerics.codeplex.com
//
// Copyright (c) 2009-2013 Math.NET
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// </copyright>
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
{
/// <summary>
/// Struct containing the various results from different CUDA API calls.
/// </summary>
internal struct CudaResults
{
/// <summary>
/// Maps to cudaError_t
/// </summary>
public int Error;
/// <summary>
/// Maps to cublasStatus_t
/// </summary>
public int BlasStatus;
/// <summary>
/// Maps to cusolverStatus_t
/// </summary>
public int SolverStatus;
}
}

112
src/Numerics/Providers/LinearAlgebra/Cuda/SafeNativeMethods.cs

@ -54,66 +54,66 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
internal static extern int query_capability(int capability);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int createBLASHandle(ref IntPtr blasHandle);
internal static extern CudaResults createBLASHandle(ref IntPtr blasHandle);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int destroyBLASHandle(IntPtr blasHandle);
internal static extern CudaResults destroyBLASHandle(IntPtr blasHandle);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int createSolverHandle(ref IntPtr solverHandle);
internal static extern CudaResults createSolverHandle(ref IntPtr solverHandle);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int destroySolverHandle(IntPtr solverHandle);
internal static extern CudaResults destroySolverHandle(IntPtr solverHandle);
#region BLAS
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern void s_axpy(IntPtr blasHandle, int n, float alpha, float[] x, [In, Out] float[] y);
internal static extern CudaResults s_axpy(IntPtr blasHandle, int n, float alpha, float[] x, [In, Out] float[] y);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern void d_axpy(IntPtr blasHandle, int n, double alpha, double[] x, [In, Out] double[] y);
internal static extern CudaResults d_axpy(IntPtr blasHandle, int n, double alpha, double[] x, [In, Out] double[] y);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern void c_axpy(IntPtr blasHandle, int n, Complex32 alpha, Complex32[] x, [In, Out] Complex32[] y);
internal static extern CudaResults c_axpy(IntPtr blasHandle, int n, Complex32 alpha, Complex32[] x, [In, Out] Complex32[] y);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern void z_axpy(IntPtr blasHandle, int n, Complex alpha, Complex[] x, [In, Out] Complex[] y);
internal static extern CudaResults z_axpy(IntPtr blasHandle, int n, Complex alpha, Complex[] x, [In, Out] Complex[] y);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern void s_scale(IntPtr blasHandle, int n, float alpha, [Out] float[] x);
internal static extern CudaResults s_scale(IntPtr blasHandle, int n, float alpha, [Out] float[] x);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern void d_scale(IntPtr blasHandle, int n, double alpha, [Out] double[] x);
internal static extern CudaResults d_scale(IntPtr blasHandle, int n, double alpha, [Out] double[] x);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern void c_scale(IntPtr blasHandle, int n, Complex32 alpha, [In, Out] Complex32[] x);
internal static extern CudaResults c_scale(IntPtr blasHandle, int n, Complex32 alpha, [In, Out] Complex32[] x);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern void z_scale(IntPtr blasHandle, int n, Complex alpha, [In, Out] Complex[] x);
internal static extern CudaResults z_scale(IntPtr blasHandle, int n, Complex alpha, [In, Out] Complex[] x);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern float s_dot_product(IntPtr blasHandle, int n, float[] x, float[] y);
internal static extern CudaResults s_dot_product(IntPtr blasHandle, int n, float[] x, float[] y, ref float result);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern double d_dot_product(IntPtr blasHandle, int n, double[] x, double[] y);
internal static extern CudaResults d_dot_product(IntPtr blasHandle, int n, double[] x, double[] y, ref double result);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern Complex32 c_dot_product(IntPtr blasHandle, int n, Complex32[] x, Complex32[] y);
internal static extern CudaResults c_dot_product(IntPtr blasHandle, int n, Complex32[] x, Complex32[] y, ref Complex32 result);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern Complex z_dot_product(IntPtr blasHandle, int n, Complex[] x, Complex[] y);
internal static extern CudaResults z_dot_product(IntPtr blasHandle, int n, Complex[] x, Complex[] y, ref Complex result);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern void s_matrix_multiply(IntPtr blasHandle, int transA, int transB, int m, int n, int k, float alpha, float[] x, float[] y, float beta, [In, Out] float[] c);
internal static extern CudaResults s_matrix_multiply(IntPtr blasHandle, int transA, int transB, int m, int n, int k, float alpha, float[] x, float[] y, float beta, [In, Out] float[] c);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern void d_matrix_multiply(IntPtr blasHandle, int transA, int transB, int m, int n, int k, double alpha, double[] x, double[] y, double beta, [In, Out] double[] c);
internal static extern CudaResults d_matrix_multiply(IntPtr blasHandle, int transA, int transB, int m, int n, int k, double alpha, double[] x, double[] y, double beta, [In, Out] double[] c);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern void c_matrix_multiply(IntPtr blasHandle, int transA, int transB, int m, int n, int k, Complex32 alpha, Complex32[] x, Complex32[] y, Complex32 beta, [In, Out] Complex32[] c);
internal static extern CudaResults c_matrix_multiply(IntPtr blasHandle, int transA, int transB, int m, int n, int k, Complex32 alpha, Complex32[] x, Complex32[] y, Complex32 beta, [In, Out] Complex32[] c);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern void z_matrix_multiply(IntPtr blasHandle, int transA, int transB, int m, int n, int k, Complex alpha, Complex[] x, Complex[] y, Complex beta, [In, Out] Complex[] c);
internal static extern CudaResults z_matrix_multiply(IntPtr blasHandle, int transA, int transB, int m, int n, int k, Complex alpha, Complex[] x, Complex[] y, Complex beta, [In, Out] Complex[] c);
internal static int ToCUDA(this Transpose transpose)
{
@ -150,100 +150,100 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
//internal static extern double z_matrix_norm(byte norm, int rows, int columns, [In] Complex[] a, [In, Out] double[] work);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int s_cholesky_factor(IntPtr solverHandle, int n, [In, Out] float[] a);
internal static extern CudaResults s_cholesky_factor(IntPtr solverHandle, int n, [In, Out] float[] a, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int d_cholesky_factor(IntPtr solverHandle, int n, [In, Out] double[] a);
internal static extern CudaResults d_cholesky_factor(IntPtr solverHandle, int n, [In, Out] double[] a, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int c_cholesky_factor(IntPtr solverHandle, int n, [In, Out] Complex32[] a);
internal static extern CudaResults c_cholesky_factor(IntPtr solverHandle, int n, [In, Out] Complex32[] a, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int z_cholesky_factor(IntPtr solverHandle, int n, [In, Out] Complex[] a);
internal static extern CudaResults z_cholesky_factor(IntPtr solverHandle, int n, [In, Out] Complex[] a, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int s_lu_factor(IntPtr solverHandle, int n, [In, Out] float[] a, [In, Out] int[] ipiv);
internal static extern CudaResults s_lu_factor(IntPtr solverHandle, int n, [In, Out] float[] a, [In, Out] int[] ipiv, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int d_lu_factor(IntPtr solverHandle, int n, [In, Out] double[] a, [In, Out] int[] ipiv);
internal static extern CudaResults d_lu_factor(IntPtr solverHandle, int n, [In, Out] double[] a, [In, Out] int[] ipiv, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int c_lu_factor(IntPtr solverHandle, int n, [In, Out] Complex32[] a, [In, Out] int[] ipiv);
internal static extern CudaResults c_lu_factor(IntPtr solverHandle, int n, [In, Out] Complex32[] a, [In, Out] int[] ipiv, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int z_lu_factor(IntPtr solverHandle, int n, [In, Out] Complex[] a, [In, Out] int[] ipiv);
internal static extern CudaResults z_lu_factor(IntPtr solverHandle, int n, [In, Out] Complex[] a, [In, Out] int[] ipiv, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int s_lu_inverse(IntPtr solverHandle, IntPtr blasHandle, int n, [In, Out] float[] a);
internal static extern CudaResults s_lu_inverse(IntPtr solverHandle, IntPtr blasHandle, int n, [In, Out] float[] a, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int d_lu_inverse(IntPtr solverHandle, IntPtr blasHandle, int n, [In, Out] double[] a);
internal static extern CudaResults d_lu_inverse(IntPtr solverHandle, IntPtr blasHandle, int n, [In, Out] double[] a, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int c_lu_inverse(IntPtr solverHandle, IntPtr blasHandle, int n, [In, Out] Complex32[] a);
internal static extern CudaResults c_lu_inverse(IntPtr solverHandle, IntPtr blasHandle, int n, [In, Out] Complex32[] a, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int z_lu_inverse(IntPtr solverHandle, IntPtr blasHandle, int n, [In, Out] Complex[] a);
internal static extern CudaResults z_lu_inverse(IntPtr solverHandle, IntPtr blasHandle, int n, [In, Out] Complex[] a, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int s_lu_inverse_factored(IntPtr blasHandle, int n, [In, Out] float[] a, [In, Out] int[] ipiv);
internal static extern CudaResults s_lu_inverse_factored(IntPtr blasHandle, int n, [In, Out] float[] a, [In, Out] int[] ipiv, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int d_lu_inverse_factored(IntPtr blasHandle, int n, [In, Out] double[] a, [In, Out] int[] ipiv);
internal static extern CudaResults d_lu_inverse_factored(IntPtr blasHandle, int n, [In, Out] double[] a, [In, Out] int[] ipiv, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int c_lu_inverse_factored(IntPtr blasHandle, int n, [In, Out] Complex32[] a, [In, Out] int[] ipiv);
internal static extern CudaResults c_lu_inverse_factored(IntPtr blasHandle, int n, [In, Out] Complex32[] a, [In, Out] int[] ipiv, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int z_lu_inverse_factored(IntPtr blasHandle, int n, [In, Out] Complex[] a, [In, Out] int[] ipiv);
internal static extern CudaResults z_lu_inverse_factored(IntPtr blasHandle, int n, [In, Out] Complex[] a, [In, Out] int[] ipiv, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int s_lu_solve_factored(IntPtr solverHandle, int n, int nrhs, float[] a, [In, Out] int[] ipiv, [In, Out] float[] b);
internal static extern CudaResults s_lu_solve_factored(IntPtr solverHandle, int n, int nrhs, float[] a, [In, Out] int[] ipiv, [In, Out] float[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int d_lu_solve_factored(IntPtr solverHandle, int n, int nrhs, double[] a, [In, Out] int[] ipiv, [In, Out] double[] b);
internal static extern CudaResults d_lu_solve_factored(IntPtr solverHandle, int n, int nrhs, double[] a, [In, Out] int[] ipiv, [In, Out] double[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int c_lu_solve_factored(IntPtr solverHandle, int n, int nrhs, Complex32[] a, [In, Out] int[] ipiv, [In, Out] Complex32[] b);
internal static extern CudaResults c_lu_solve_factored(IntPtr solverHandle, int n, int nrhs, Complex32[] a, [In, Out] int[] ipiv, [In, Out] Complex32[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int z_lu_solve_factored(IntPtr solverHandle, int n, int nrhs, Complex[] a, [In, Out] int[] ipiv, [In, Out] Complex[] b);
internal static extern CudaResults z_lu_solve_factored(IntPtr solverHandle, int n, int nrhs, Complex[] a, [In, Out] int[] ipiv, [In, Out] Complex[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int s_lu_solve(IntPtr solverHandle, int n, int nrhs, float[] a, [In, Out] float[] b);
internal static extern CudaResults s_lu_solve(IntPtr solverHandle, int n, int nrhs, float[] a, [In, Out] float[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int d_lu_solve(IntPtr solverHandle, int n, int nrhs, double[] a, [In, Out] double[] b);
internal static extern CudaResults d_lu_solve(IntPtr solverHandle, int n, int nrhs, double[] a, [In, Out] double[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int c_lu_solve(IntPtr solverHandle, int n, int nrhs, Complex32[] a, [In, Out] Complex32[] b);
internal static extern CudaResults c_lu_solve(IntPtr solverHandle, int n, int nrhs, Complex32[] a, [In, Out] Complex32[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int z_lu_solve(IntPtr solverHandle, int n, int nrhs, Complex[] a, [In, Out] Complex[] b);
internal static extern CudaResults z_lu_solve(IntPtr solverHandle, int n, int nrhs, Complex[] a, [In, Out] Complex[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int s_cholesky_solve(IntPtr solverHandle, int n, int nrhs, float[] a, [In, Out] float[] b);
internal static extern CudaResults s_cholesky_solve(IntPtr solverHandle, int n, int nrhs, float[] a, [In, Out] float[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int d_cholesky_solve(IntPtr solverHandle, int n, int nrhs, double[] a, [In, Out] double[] b);
internal static extern CudaResults d_cholesky_solve(IntPtr solverHandle, int n, int nrhs, double[] a, [In, Out] double[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int c_cholesky_solve(IntPtr solverHandle, int n, int nrhs, Complex32[] a, [In, Out] Complex32[] b);
internal static extern CudaResults c_cholesky_solve(IntPtr solverHandle, int n, int nrhs, Complex32[] a, [In, Out] Complex32[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int z_cholesky_solve(IntPtr solverHandle, int n, int nrhs, Complex[] a, [In, Out] Complex[] b);
internal static extern CudaResults z_cholesky_solve(IntPtr solverHandle, int n, int nrhs, Complex[] a, [In, Out] Complex[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int s_cholesky_solve_factored(IntPtr solverHandle, int n, int nrhs, float[] a, [In, Out] float[] b);
internal static extern CudaResults s_cholesky_solve_factored(IntPtr solverHandle, int n, int nrhs, float[] a, [In, Out] float[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int d_cholesky_solve_factored(IntPtr solverHandle, int n, int nrhs, double[] a, [In, Out] double[] b);
internal static extern CudaResults d_cholesky_solve_factored(IntPtr solverHandle, int n, int nrhs, double[] a, [In, Out] double[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int c_cholesky_solve_factored(IntPtr solverHandle, int n, int nrhs, Complex32[] a, [In, Out] Complex32[] b);
internal static extern CudaResults c_cholesky_solve_factored(IntPtr solverHandle, int n, int nrhs, Complex32[] a, [In, Out] Complex32[] b, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int z_cholesky_solve_factored(IntPtr solverHandle, int n, int nrhs, Complex[] a, [In, Out] Complex[] b);
internal static extern CudaResults z_cholesky_solve_factored(IntPtr solverHandle, int n, int nrhs, Complex[] a, [In, Out] Complex[] b, ref int info);
//[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
//internal static extern int s_qr_factor(int m, int n, [In, Out] float[] r, [In, Out] float[] tau, [In, Out] float[] q, [In, Out] float[] work, int len);
@ -294,16 +294,16 @@ namespace MathNet.Numerics.Providers.LinearAlgebra.Cuda
//internal static extern int z_qr_solve_factored(int m, int n, int bn, Complex[] r, Complex[] b, Complex[] tau, [In, Out] Complex[] x, [In, Out] Complex[] work, int len);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int s_svd_factor(IntPtr solverHandle, [MarshalAs(UnmanagedType.U1)] bool computeVectors, int m, int n, [In, Out] float[] a, [In, Out] float[] s, [In, Out] float[] u, [In, Out] float[] v);
internal static extern CudaResults s_svd_factor(IntPtr solverHandle, [MarshalAs(UnmanagedType.U1)] bool computeVectors, int m, int n, [In, Out] float[] a, [In, Out] float[] s, [In, Out] float[] u, [In, Out] float[] v, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int d_svd_factor(IntPtr solverHandle, [MarshalAs(UnmanagedType.U1)] bool computeVectors, int m, int n, [In, Out] double[] a, [In, Out] double[] s, [In, Out] double[] u, [In, Out] double[] v);
internal static extern CudaResults d_svd_factor(IntPtr solverHandle, [MarshalAs(UnmanagedType.U1)] bool computeVectors, int m, int n, [In, Out] double[] a, [In, Out] double[] s, [In, Out] double[] u, [In, Out] double[] v, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int c_svd_factor(IntPtr solverHandle, [MarshalAs(UnmanagedType.U1)] bool computeVectors, int m, int n, [In, Out] Complex32[] a, [In, Out] Complex32[] s, [In, Out] Complex32[] u, [In, Out] Complex32[] v);
internal static extern CudaResults c_svd_factor(IntPtr solverHandle, [MarshalAs(UnmanagedType.U1)] bool computeVectors, int m, int n, [In, Out] Complex32[] a, [In, Out] Complex32[] s, [In, Out] Complex32[] u, [In, Out] Complex32[] v, ref int info);
[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
internal static extern int z_svd_factor(IntPtr solverHandle, [MarshalAs(UnmanagedType.U1)] bool computeVectors, int m, int n, [In, Out] Complex[] a, [In, Out] Complex[] s, [In, Out] Complex[] u, [In, Out] Complex[] v);
internal static extern CudaResults z_svd_factor(IntPtr solverHandle, [MarshalAs(UnmanagedType.U1)] bool computeVectors, int m, int n, [In, Out] Complex[] a, [In, Out] Complex[] s, [In, Out] Complex[] u, [In, Out] Complex[] v, ref int info);
//[DllImport(_DllName, ExactSpelling = true, SetLastError = false, CallingConvention = CallingConvention.Cdecl)]
//internal static extern int s_eigen([MarshalAs(UnmanagedType.U1)] bool isSymmetric, int n, [In] float[] a, [In, Out] float[] vectors, [In, Out] Complex[] values, [In, Out] float[] d);

Loading…
Cancel
Save