|
|
|
@ -4,7 +4,7 @@ |
|
|
|
#include "wrapper_common.h" |
|
|
|
|
|
|
|
template<typename T, typename AXPY> |
|
|
|
void cuda_axpy(const cublasHandle_t blasHandle, const int n, const T *alpha, const T x[], int incX, T y[], int incY, AXPY axpy) |
|
|
|
void cuda_axpy(const cublasHandle_t blasHandle, const int n, const T alpha, const T x[], int incX, T y[], int incY, AXPY axpy) |
|
|
|
{ |
|
|
|
T *d_X = NULL; |
|
|
|
T *d_Y = NULL; |
|
|
|
@ -14,7 +14,7 @@ void cuda_axpy(const cublasHandle_t blasHandle, const int n, const T *alpha, con |
|
|
|
cublasSetVector(n, sizeof(T), x, incX, d_X, incX); |
|
|
|
cublasSetVector(n, sizeof(T), y, incY, d_Y, incY); |
|
|
|
|
|
|
|
axpy(blasHandle, n, alpha, d_X, incX, d_Y, incX); |
|
|
|
axpy(blasHandle, n, &alpha, d_X, incX, d_Y, incX); |
|
|
|
|
|
|
|
cublasGetVector(n, sizeof(T), d_Y, incY, y, incY); |
|
|
|
|
|
|
|
@ -23,14 +23,14 @@ void cuda_axpy(const cublasHandle_t blasHandle, const int n, const T *alpha, con |
|
|
|
} |
|
|
|
|
|
|
|
template<typename T, typename SCAL> |
|
|
|
void cuda_scal(const cublasHandle_t blasHandle, const int n, const T *alpha, T x[], int incX, SCAL scal) |
|
|
|
void cuda_scal(const cublasHandle_t blasHandle, const int n, const T alpha, T x[], int incX, SCAL scal) |
|
|
|
{ |
|
|
|
T *d_X = NULL; |
|
|
|
cudaMalloc((void**)&d_X, n*sizeof(T)); |
|
|
|
|
|
|
|
cublasSetVector(n, sizeof(T), x, incX, d_X, incX); |
|
|
|
|
|
|
|
scal(blasHandle, n, alpha, d_X, incX); |
|
|
|
scal(blasHandle, n, &alpha, d_X, incX); |
|
|
|
|
|
|
|
cublasGetVector(n, sizeof(T), d_X, incX, x, incX); |
|
|
|
|
|
|
|
@ -81,35 +81,35 @@ void cuda_gemm(const cublasHandle_t handle, const cublasOperation_t transa, cons |
|
|
|
extern "C" { |
|
|
|
|
|
|
|
DLLEXPORT void s_axpy(const cublasHandle_t blasHandle, const int n, const float alpha, const float x[], float y[]){ |
|
|
|
cuda_axpy(blasHandle, n, &alpha, x, 1, y, 1, cublasSaxpy); |
|
|
|
cuda_axpy(blasHandle, n, alpha, x, 1, y, 1, cublasSaxpy); |
|
|
|
} |
|
|
|
|
|
|
|
DLLEXPORT void d_axpy(const cublasHandle_t blasHandle, const int n, const double alpha, const double x[], double y[]){ |
|
|
|
cuda_axpy(blasHandle, n, &alpha, x, 1, y, 1, cublasDaxpy); |
|
|
|
cuda_axpy(blasHandle, n, alpha, x, 1, y, 1, cublasDaxpy); |
|
|
|
} |
|
|
|
|
|
|
|
DLLEXPORT void c_axpy(const cublasHandle_t blasHandle, const int n, const cuComplex alpha, const cuComplex x[], cuComplex y[]){ |
|
|
|
cuda_axpy(blasHandle, n, &alpha, x, 1, y, 1, cublasCaxpy); |
|
|
|
cuda_axpy(blasHandle, n, alpha, x, 1, y, 1, cublasCaxpy); |
|
|
|
} |
|
|
|
|
|
|
|
DLLEXPORT void z_axpy(const cublasHandle_t blasHandle, const int n, const cuDoubleComplex alpha, const cuDoubleComplex x[], cuDoubleComplex y[]){ |
|
|
|
cuda_axpy(blasHandle, n, &alpha, x, 1, y, 1, cublasZaxpy); |
|
|
|
cuda_axpy(blasHandle, n, alpha, x, 1, y, 1, cublasZaxpy); |
|
|
|
} |
|
|
|
|
|
|
|
DLLEXPORT void s_scale(const cublasHandle_t blasHandle, const int n, const float alpha, float x[]){ |
|
|
|
cuda_scal(blasHandle, n, &alpha, x, 1, cublasSscal); |
|
|
|
cuda_scal(blasHandle, n, alpha, x, 1, cublasSscal); |
|
|
|
} |
|
|
|
|
|
|
|
DLLEXPORT void d_scale(const cublasHandle_t blasHandle, const int n, const double alpha, double x[]){ |
|
|
|
cuda_scal(blasHandle, n, &alpha, x, 1, cublasDscal); |
|
|
|
cuda_scal(blasHandle, n, alpha, x, 1, cublasDscal); |
|
|
|
} |
|
|
|
|
|
|
|
DLLEXPORT void c_scale(const cublasHandle_t blasHandle, const int n, const cuComplex alpha, cuComplex x[]){ |
|
|
|
cuda_scal(blasHandle, n, &alpha, x, 1, cublasCscal); |
|
|
|
cuda_scal(blasHandle, n, alpha, x, 1, cublasCscal); |
|
|
|
} |
|
|
|
|
|
|
|
DLLEXPORT void z_scale(const cublasHandle_t blasHandle, const int n, const cuDoubleComplex alpha, cuDoubleComplex x[]){ |
|
|
|
cuda_scal(blasHandle, n, &alpha, x, 1, cublasZscal); |
|
|
|
cuda_scal(blasHandle, n, alpha, x, 1, cublasZscal); |
|
|
|
} |
|
|
|
|
|
|
|
DLLEXPORT float s_dot_product(const cublasHandle_t blasHandle, const int n, const float x[], const float y[]){ |
|
|
|
|