using cublasSgemmBatched in mex

Hi,

I am trying to multiply Multiple Matrices from within matlab using cublasSgemmBatched in a mex file.

My matlab code is very simple:

gpuDevice(1);
a = single(rand(400,10,1500,‘gpuArray’));
b = single(rand(10,12,1500,‘gpuArray’));
c = MatCuda(a,b)

I get the following error:

Error using gpuArray/subsref
An unexpected error occurred during CUDA execution. The CUDA error was:
unknown error

and here’s the mexFunction code:

void mexFunction( int nlhs, mxArray *plhs,
int nrhs, const mxArray *prhs){

char const * const errId = "parallel:gpu:mexGPUExample:InvalidInput";
char const * const errMsg = "Invalid input to MEX file.";

/* Declare all variables.*/
mxGPUArray const *A;
mxGPUArray const *B;
mxGPUArray *C;

/* Initialize the MathWorks GPU API. */
mxInitGPU();

/* Throw an error if the input is not a GPU array. */
if ((nrhs != 2) || !(mxIsGPUArray(prhs[0])) || !(mxIsGPUArray(prhs[1]))) {
	mexErrMsgIdAndTxt(errId, errMsg);
}

A = mxGPUCreateFromMxArray(prhs[0]);
B = mxGPUCreateFromMxArray(prhs[1]);

if ((mxGPUGetClassID(A) != mxSINGLE_CLASS) || (mxGPUGetClassID(B) != mxSINGLE_CLASS)) {
	mexErrMsgIdAndTxt(errId, errMsg);
}

float const *d_A;
float const *d_B;
d_A = (float const *)(mxGPUGetDataReadOnly(A));
d_B = (float const *)(mxGPUGetDataReadOnly(B));

const mwSize *dimsA = mxGPUGetDimensions(A);
size_t nrowsA = dimsA[0];
size_t ncolsA = dimsA[1];
size_t nMatricesA = dimsA[2];
mxFree((void*) dimsA);

const mwSize *dimsB = mxGPUGetDimensions(B);
size_t nrowsB = dimsB[0];
size_t ncolsB = dimsB[1];
size_t nMatricesB = dimsB[2];
mxFree((void*)dimsB);

size_t nrowsC = nrowsA;
size_t ncolsC = ncolsB;

mwSize dimsC[3] = { nrowsA, ncolsB, nMatricesB };
C = mxGPUCreateGPUArray(mxGPUGetNumberOfDimensions(A),
	dimsC,
	mxGPUGetClassID(A),
	mxGPUGetComplexity(A),
	MX_GPU_DO_NOT_INITIALIZE);

float *d_C;
d_C = (float *)(mxGPUGetData(C));

cublasHandle_t handle;
cublasStatus_t ret;
ret = cublasCreate(&handle);
if (ret != CUBLAS_STATUS_SUCCESS)
{
	printf("cublasCreate returned error code %d, line(%d)\n", ret, __LINE__);
	exit(EXIT_FAILURE);
}
const float alpha = 1.0f;
const float beta = 0.0f;
ret = cublasSgemmBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, nrowsA, ncolsB, ncolsA, &alpha, &d_A, nrowsA, &d_B, nrowsB, &beta, &d_C, nrowsC, nMatricesA);

if (ret != CUBLAS_STATUS_SUCCESS)
{
	printf("cublasSgemm returned error code %d, line(%d)\n", ret, __LINE__);
	exit(EXIT_FAILURE);
}

ret = cublasDestroy(handle);
if (ret != CUBLAS_STATUS_SUCCESS)
{
	printf("cublasCreate returned error code %d, line(%d)\n", ret, __LINE__);
	exit(EXIT_FAILURE);
}

plhs[0] = mxGPUCreateMxArrayOnGPU(C);
mxGPUDestroyGPUArray(A);
mxGPUDestroyGPUArray(B);
mxGPUDestroyGPUArray(C);

}

I suspect it’s related to the function cublasSgemmBatched because when I remove it from the code then I don’t get the error.

Help will be very much appreciated !
Thanks !

also cross-posted here:

http://stackoverflow.com/questions/33113849/error-using-cublassgemmbatched-in-mex