Oops - I see I posted this to the wrong forum; I’ve moved my question to the programming forum.
I am getting my feet wet with Cuda in attempting to write a mex file that will use the cublasSgemm routine to multiply two matrices. I have Suse linux 10.3 and Cuda 1.1, manipulated to work on the system (all examples compile and work, the Bretherton’s matlab fluid/fft example compiles and works). I don’t really know quite what I am doing; I stumble around quite a bit, but it would be nice to get the routine below working. It compiles o.k., and runs in matlab ok without crashing (a miracle), but it reports back the wrong answer - I suspect array dimensioning problems. If called from matlab as C=sgemm_cu(A,B), the answer for C comes back with the correct dimensions but filled with elements of B, and the A array has arbitrary elements (it’s supposed to be unchanged). I modified the matlab-example Makefile to work for this routine. I am testing it on small 3X3 matrices, but the intent is to use it on 4000X4000 sized matrices or so. I am stuck - and so ask for help.
I note that a script like this would be a simpler (perhaps more useful?) matlab-mex example than the impressive fluid mechanics example.
Here is the code; I’d be thrilled to receive hints that would get it working!
#include "mex.h"
#include "cublas.h"
#define IDX2C(i,j,ld) (((j)*(ld))+(i)) /* is something like this needed??? */
void convert_double2float( double *input_double, float *output_float,int Ntot)
{
int i;
for (i = 0; i < Ntot; i++)
{
output_float[i] = (float) input_double[i];
}
}
void convert_float2double( float *input_float, double *output_double,int Ntot)
{
int i;
for (i = 0; i < Ntot; i++)
{
output_double[i] = (double) input_float[i];
}
}
/* sgemm_cu.cu
function [C]=sgemm(A,B)
*/
void mexFunction( int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[])
{
int M,K,L,N;
int dims0[2];
float alpha,beta;
double *A,*B,*C;
float *a,*b,*c;
cublasStatus stat;
float* devPtrA;
/* char chn[3] = "N"; */
if (nrhs != 2) {
mexErrMsgTxt("sgemm requires 2 input arguments");
} else if (nlhs != 1) {
mexErrMsgTxt("sgemm requires 1 output argument");
}
alpha = 1.0;
beta = 0.0;
/* printf("alpha=%f\n",alpha);
printf("beta=%f\n",beta); */
M = mxGetM(prhs[0]); /* gets number of rows of A */
K = mxGetN(prhs[0]); /* gets number of columns of A */
L = mxGetM(prhs[1]); /* gets number of rows of B */
N = mxGetN(prhs[1]); /* gets number of columns of B */
/* K and L should be the same... */
/* printf("M=%i\n",M);
printf("K=%i\n",K);
printf("L=%i\n",L);
printf("N=%i\n",N); */
printf(""); /* program crashes without this print statement here (???) */
/* Left hand side matrix set up */
dims0[0]=M;
dims0[1]=N;
plhs[0] = mxCreateNumericArray(2,dims0,mxDOUBLE_CLASS,mxREAL);
C = mxGetPr(plhs[0]);
/* Matrix 1 */
A = mxGetPr(prhs[0]);
/* Matrix 2 */
B = mxGetPr(prhs[1]);
convert_double2float(A,a,M*K);
convert_double2float(B,b,N*K);
cublasInit();
stat = cublasAlloc (25, sizeof(*a), (void**)&devPtrA); /* I haven't a clue...help? */
/* dgemm_(chn,chn,&M,&N,&K,&alpha,A,&M,B,&L,&beta,C,&M); */
cublasSgemm ('n','n',M,N,K,alpha,a,M,b,L,beta,c,M);
cublasFree (devPtrA);
cublasShutdown();
convert_float2double(c,C,M*N);
}