i want to add 2 matrix by cuda programming in matlab mex file, and my program is:
#include “mex.h”
void convert_double2float( double *input_double, float *output_float,int Ntot)
{
int i;
for (i = 0; i < Ntot; i++)
{
output_float[i] = (float) input_double[i];
}
}
void convert_float2double( float *input_float, double *output_double,int Ntot)
{
int i;
for (i = 0; i < Ntot; i++)
{
output_double[i] = (double) input_float[i];
}
}
global void VecAdd(float* A, float* B, float* C)
{
int i = threadIdx.x;
C[i] = A[i] + B[i];
}
void mexFunction( int nlhs, mxArray *plhs, int nrhs, const mxArray *prhs)
{
float *A,*B,*C;
double *input_A,*input_B, *output_C;
int M,N;
M = mxGetM(prhs[0]);
N = mxGetN(prhs[0]);
mexPrintf(“\nm=%d , n=%d\n”,M,N);
/* Retrieve the input data */
input_A = (double *) mxGetData(prhs[0]);
input_B = (double *) mxGetData(prhs[1]);
A = (float*) mxMalloc(sizeof(float)*N*M);
B = (float*) mxMalloc(sizeof(float)*N*M);
C = (float*) mxMalloc(sizeof(float)*N*M);
convert_double2float(input_A, A, N*M);
convert_double2float(input_B, B, N*M);
int block_size=16;
dim3 dimBlock(block_size,block_size);
VecAdd<<<1, dimBlock>>>(A, B, C);
/* Create an mxArray for the output data */
plhs[0] = mxCreateDoubleMatrix(M, N, mxREAL);
/* Create a pointer to the output data */
output_C = mxGetPr(plhs[0]);
convert_float2double(C,output_C, N*M);
mxFree(A);
mxFree(B);
mxFree©;
return;
}
in execute:
nvmex -f nvmexopts.bat add.cu -IC:\cuda\include -LC:\cuda\lib -lcufft -lcudart
add.cu
tmpxft_00000a24_00000000-3_add.cudafe1.gpu
tmpxft_00000a24_00000000-8_add.cudafe2.gpu
tmpxft_00000a24_00000000-3_add.cudafe1.cpp
c=add(a,B);
External Image but it not correct answer, please help me!!!
:wacko: