Matlab mex file using cublas - problems

Oops - I see I posted this to the wrong forum; I’ve moved my question to the programming forum.

I am getting my feet wet with Cuda in attempting to write a mex file that will use the cublasSgemm routine to multiply two matrices. I have Suse linux 10.3 and Cuda 1.1, manipulated to work on the system (all examples compile and work, the Bretherton’s matlab fluid/fft example compiles and works). I don’t really know quite what I am doing; I stumble around quite a bit, but it would be nice to get the routine below working. It compiles o.k., and runs in matlab ok without crashing (a miracle), but it reports back the wrong answer - I suspect array dimensioning problems. If called from matlab as C=sgemm_cu(A,B), the answer for C comes back with the correct dimensions but filled with elements of B, and the A array has arbitrary elements (it’s supposed to be unchanged). I modified the matlab-example Makefile to work for this routine. I am testing it on small 3X3 matrices, but the intent is to use it on 4000X4000 sized matrices or so. I am stuck - and so ask for help.

I note that a script like this would be a simpler (perhaps more useful?) matlab-mex example than the impressive fluid mechanics example.

Here is the code; I’d be thrilled to receive hints that would get it working!

#include "mex.h"

#include "cublas.h"

#define IDX2C(i,j,ld) (((j)*(ld))+(i))   /* is something like this needed??? */

void convert_double2float( double *input_double, float *output_float,int Ntot)

{

    int i;

    for (i = 0; i < Ntot; i++)

    {

                output_float[i] = (float) input_double[i];

    }

}

void convert_float2double( float *input_float, double *output_double,int Ntot)

{

    int i;

    for (i = 0; i < Ntot; i++)

    {

                output_double[i] = (double) input_float[i];

    }

}

/*  sgemm_cu.cu  

  function [C]=sgemm(A,B)

*/

void mexFunction( int nlhs, mxArray *plhs[],

    int nrhs, const mxArray *prhs[])

{

      int M,K,L,N;

      int dims0[2];

      float alpha,beta;

      double *A,*B,*C;

      float *a,*b,*c;

      cublasStatus stat;

      float* devPtrA;

     /* char chn[3] = "N"; */

     if (nrhs != 2) {

          mexErrMsgTxt("sgemm requires 2 input arguments");

      } else if (nlhs != 1) {

          mexErrMsgTxt("sgemm requires 1 output argument");

      }

      

      alpha = 1.0;

      beta = 0.0;

   

  /*    printf("alpha=%f\n",alpha); 

      printf("beta=%f\n",beta);  */

     M = mxGetM(prhs[0]);   /* gets number of rows of A */

      K = mxGetN(prhs[0]);   /* gets number of columns of A */

      L = mxGetM(prhs[1]);   /* gets number of rows of B */

      N = mxGetN(prhs[1]);   /* gets number of columns of B */

      /* K and L should be the same... */

  /* printf("M=%i\n",M); 

      printf("K=%i\n",K); 

      printf("L=%i\n",L); 

      printf("N=%i\n",N);   */

      printf("");     /* program crashes without this print statement here (???)  */

  

/* Left hand side matrix set up */

      dims0[0]=M;

      dims0[1]=N;

      plhs[0] = mxCreateNumericArray(2,dims0,mxDOUBLE_CLASS,mxREAL);

      C = mxGetPr(plhs[0]);

      

/* Matrix 1 */

      A = mxGetPr(prhs[0]);

/* Matrix 2 */

      B = mxGetPr(prhs[1]);

     convert_double2float(A,a,M*K);

      convert_double2float(B,b,N*K);

     cublasInit();

      stat = cublasAlloc (25, sizeof(*a), (void**)&devPtrA);    /* I haven't a clue...help? */

    /* dgemm_(chn,chn,&M,&N,&K,&alpha,A,&M,B,&L,&beta,C,&M);  */   

      cublasSgemm ('n','n',M,N,K,alpha,a,M,b,L,beta,c,M); 

     cublasFree (devPtrA);

      cublasShutdown(); 

     convert_float2double(c,C,M*N);

}