Hi,
I am trying to multiply Multiple Matrices from within matlab using cublasSgemmBatched in a mex file.
My matlab code is very simple:
gpuDevice(1);
a = single(rand(400,10,1500,‘gpuArray’));
b = single(rand(10,12,1500,‘gpuArray’));
c = MatCuda(a,b)
I get the following error:
Error using gpuArray/subsref
An unexpected error occurred during CUDA execution. The CUDA error was:
unknown error
and here’s the mexFunction code:
void mexFunction( int nlhs, mxArray *plhs,
int nrhs, const mxArray *prhs){
char const * const errId = "parallel:gpu:mexGPUExample:InvalidInput";
char const * const errMsg = "Invalid input to MEX file.";
/* Declare all variables.*/
mxGPUArray const *A;
mxGPUArray const *B;
mxGPUArray *C;
/* Initialize the MathWorks GPU API. */
mxInitGPU();
/* Throw an error if the input is not a GPU array. */
if ((nrhs != 2) || !(mxIsGPUArray(prhs[0])) || !(mxIsGPUArray(prhs[1]))) {
mexErrMsgIdAndTxt(errId, errMsg);
}
A = mxGPUCreateFromMxArray(prhs[0]);
B = mxGPUCreateFromMxArray(prhs[1]);
if ((mxGPUGetClassID(A) != mxSINGLE_CLASS) || (mxGPUGetClassID(B) != mxSINGLE_CLASS)) {
mexErrMsgIdAndTxt(errId, errMsg);
}
float const *d_A;
float const *d_B;
d_A = (float const *)(mxGPUGetDataReadOnly(A));
d_B = (float const *)(mxGPUGetDataReadOnly(B));
const mwSize *dimsA = mxGPUGetDimensions(A);
size_t nrowsA = dimsA[0];
size_t ncolsA = dimsA[1];
size_t nMatricesA = dimsA[2];
mxFree((void*) dimsA);
const mwSize *dimsB = mxGPUGetDimensions(B);
size_t nrowsB = dimsB[0];
size_t ncolsB = dimsB[1];
size_t nMatricesB = dimsB[2];
mxFree((void*)dimsB);
size_t nrowsC = nrowsA;
size_t ncolsC = ncolsB;
mwSize dimsC[3] = { nrowsA, ncolsB, nMatricesB };
C = mxGPUCreateGPUArray(mxGPUGetNumberOfDimensions(A),
dimsC,
mxGPUGetClassID(A),
mxGPUGetComplexity(A),
MX_GPU_DO_NOT_INITIALIZE);
float *d_C;
d_C = (float *)(mxGPUGetData(C));
cublasHandle_t handle;
cublasStatus_t ret;
ret = cublasCreate(&handle);
if (ret != CUBLAS_STATUS_SUCCESS)
{
printf("cublasCreate returned error code %d, line(%d)\n", ret, __LINE__);
exit(EXIT_FAILURE);
}
const float alpha = 1.0f;
const float beta = 0.0f;
ret = cublasSgemmBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, nrowsA, ncolsB, ncolsA, &alpha, &d_A, nrowsA, &d_B, nrowsB, &beta, &d_C, nrowsC, nMatricesA);
if (ret != CUBLAS_STATUS_SUCCESS)
{
printf("cublasSgemm returned error code %d, line(%d)\n", ret, __LINE__);
exit(EXIT_FAILURE);
}
ret = cublasDestroy(handle);
if (ret != CUBLAS_STATUS_SUCCESS)
{
printf("cublasCreate returned error code %d, line(%d)\n", ret, __LINE__);
exit(EXIT_FAILURE);
}
plhs[0] = mxGPUCreateMxArrayOnGPU(C);
mxGPUDestroyGPUArray(A);
mxGPUDestroyGPUArray(B);
mxGPUDestroyGPUArray(C);
}
I suspect it’s related to the function cublasSgemmBatched because when I remove it from the code then I don’t get the error.
Help will be very much appreciated !
Thanks !