I am trying to use CUDA to create mex files for use in Matlab. I found a white paper called Accelerating Matlab with CUDA using MEX Files at this link I am sure many people have seen.
http://developer.nvidia.com/object/matlab_cuda.html
I am trying the square_me.c example that starts on page 5. I compiled using the nvmexopts.bat and nvmex.pl I found from this site people have used sucessfully. My configureation is a little different I am using Windows XP64 bit, Matlab 2009a, MS Stuido 2005 (V8). One of the lines in the nvmexopts.bat is
set WINSDKDIR=C:\Program Files\Microsoft SDKs\Windows\V6.0A
I do not have that directory and don’t know what it is really using there. I think this may be a probem for me. I have added the files I am using and below are the error messages I am getting from Matlab.
Below is the codebox of the error I am getting in Matlab 2009a.
[codebox]
mex -v -f nvmexopts.bat square_cuda.c -IC:\cuda\include64 -LC:\cuda\lib64 -lcufft -lcudart
This is mex, Copyright 1984-2007 The MathWorks, Inc.
→ Options file specified on command line
→ Options file = NVMEXO~1.BAT
MATLAB = C:\PROGRA~1\MATLAB\R2009A
→ COMPILER = nvcc
→ Compiler flags:
COMPFLAGS = -c -Xcompiler "/c /Zp8 /GR /W3 /EHs /D_CRT_SECURE_NO_DEPRECATE /D_SCL_SECURE_NO_DEPRECATE /D_SECURE_SCL=0 /DMATLAB_MEX_FILE /nologo /MD"
OPTIMFLAGS = -Xcompiler "/O2 /Oy- /DNDEBUG"
DEBUGFLAGS = -Xcompiler "/Zi /Fd"square_cuda.mexw64.pdb""
arguments = -IC:\cuda\include64
Name switch =
→ Pre-linking commands =
→ LINKER = link
→ Link directives:
LINKFLAGS = /dll /export:mexFunction /MAP /LIBPATH:"C:\PROGRA~1\MATLAB\R2009A\extern\lib\win64\microsoft" libmx.lib libmex.lib libmat.lib /implib:C:\DOCUME~1\HOSKIN~1\LOCALS~1\TEMP\MEX_AQ~1\templib.x /MACHINE:X64 kernel64.lib user64.lib gdi64.lib winspool.lib comdlg64.lib advapi64.lib shell64.lib ole64.lib oleaut64.lib uuid.lib odbc64.lib odbccp64.lib
LINKDEBUGFLAGS = /DEBUG /PDB:"square_cuda.mexw64.pdb"
LINKFLAGSPOST =
Name directive = /out:"square_cuda.mexw64"
File link directive =
Lib. link directive =
Rsp file indicator = @
→ Resource Compiler = rc /fo “mexversion.res”
→ Resource Linker =
→ nvcc -IC:\cuda\include64 -c -Xcompiler “/c /Zp8 /GR /W3 /EHs /D_CRT_SECURE_NO_DEPRECATE /D_SCL_SECURE_NO_DEPRECATE /D_SECURE_SCL=0 /DMATLAB_MEX_FILE /nologo /MD” -IC:\PROGRA~1\MATLAB\R2009A\extern\include -Xcompiler “/O2 /Oy- /DNDEBUG” -DMX_COMPAT_32 square_cuda.c
square_cuda.c
square_cuda.c(8) : error C2065: ‘blockIdx’ : undeclared identifier
square_cuda.c(8) : error C2224: left of ‘.x’ must have struct/union type
square_cuda.c(8) : error C2065: ‘blockDim’ : undeclared identifier
square_cuda.c(8) : error C2224: left of ‘.x’ must have struct/union type
square_cuda.c(8) : error C2065: ‘threadIdx’ : undeclared identifier
square_cuda.c(8) : error C2224: left of ‘.x’ must have struct/union type
square_cuda.c(27) : warning C4267: ‘=’ : conversion from ‘size_t’ to ‘int’, possible loss of data
square_cuda.c(28) : warning C4267: ‘=’ : conversion from ‘size_t’ to ‘int’, possible loss of data
square_cuda.c(62) : error C2275: ‘dim3’ : illegal use of this type as an expression
c:\cuda\include\vector_types.h(480) : see declaration of 'dim3'
square_cuda.c(62) : error C2146: syntax error : missing ‘;’ before identifier ‘dimBlock’
square_cuda.c(62) : warning C4013: ‘dimBlock’ undefined; assuming extern returning int
square_cuda.c(63) : error C2275: ‘dim3’ : illegal use of this type as an expression
c:\cuda\include\vector_types.h(480) : see declaration of 'dim3'
square_cuda.c(63) : error C2146: syntax error : missing ‘;’ before identifier ‘dimGrid’
square_cuda.c(63) : warning C4013: ‘dimGrid’ undefined; assuming extern returning int
square_cuda.c(63) : error C2224: left of ‘.x’ must have struct/union type
square_cuda.c(65) : error C2224: left of ‘.x’ must have struct/union type
square_cuda.c(68) : error C2059: syntax error : ‘<’
C:\PROGRA~1\MATLAB\R2009A\BIN\MEX.PL: Error: Compile of ‘square_cuda.c’ failed.
??? Error using ==> mex at 218
Unable to complete successfully.
[/codebox]
Below is the code for nvmexopts.bat.
[codebox]@echo off
rem MSVC90OPTS.BAT
rem
rem Compile and link options used for building MEX-files
rem using the Microsoft Visual C++ compiler version 9.0
rem
rem StorageVersion: 1.0
rem C++keyFileName: MSVC90OPTS.BAT
rem C++keyName: Microsoft Visual C++ 2008
rem C++keyManufacturer: Microsoft
rem C++keyVersion: 9.0
rem C++keyLanguage: C++
rem
rem $Revision: 1.1.10.6 $ $Date: 2007/11/07 17:44:06 $
rem Copyright 1984-2007 The MathWorks, Inc.
rem
rem ************************************************************
rem General parameters
rem ************************************************************
set MATLAB=%MATLAB%
set VS90COMNTOOLS=%VS90COMNTOOLS%
set VSINSTALLDIR=C:\Program Files (x86)\Microsoft Visual Studio 8
set VCINSTALLDIR=%VSINSTALLDIR%\VC
set WINSDKDIR=C:\Program Files\Microsoft SDKs\Windows\V6.0A
set PATH=%VCINSTALLDIR%\BIN;%WINSDKDIR%\bin;%VSINSTALLDIR%\Common7\IDE;%VSINSTALLDIR%\SDK\v2.0\bin;%VSINSTALLDIR%\Common7\Tools;%VSINSTALLDIR%\Common7\Tools\bin;%VCINSTALLDIR%\VCPackages;%MATLAB_BIN%;%PATH%;C:\Program Files (x86)\Microsoft Visual Studio 8\VC\bin
set INCLUDE=%VCINSTALLDIR%\ATLMFC\INCLUDE;%VCINSTALLDIR%\INCLUDE;%WINSDKDIR%\INCLUDE;%VSINSTALLDIR%\SDK\v2.0\include;%INCLUDE%
set LIB=%VCINSTALLDIR%\ATLMFC\LIB\AMD64;%WINSDKDIR%\lib\x64;%VSINSTALLDIR%\SDK\v2.0\lib\AMD64;%MATLAB%\extern\lib\win64;%LIB%;%VCINSTALLDIR%\LIB\amd64
set MW_TARGET_ARCH=win64
rem ************************************************************
rem Compiler parameters
rem ************************************************************
rem set COMPILER=cl
set COMPILER=nvcc
set COMPFLAGS= -c -Xcompiler “/c /Zp8 /GR /W3 /EHs /D_CRT_SECURE_NO_DEPRECATE /D_SCL_SECURE_NO_DEPRECATE /D_SECURE_SCL=0 /DMATLAB_MEX_FILE /nologo /MD”
set OPTIMFLAGS=-Xcompiler “/O2 /Oy- /DNDEBUG”
set DEBUGFLAGS=-Xcompiler “/Zi /Fd”%OUTDIR%%MEX_NAME%%MEX_EXT%.pdb""
set NAME_OBJECT=
rem set NAME_OBJECT=/Fo
rem ************************************************************
rem Linker parameters
rem ************************************************************
set LIBLOC=%MATLAB%\extern\lib\win64\microsoft
set LINKER=link
set LINKFLAGS=/dll /export:%ENTRYPOINT% /MAP /LIBPATH:“%LIBLOC%” libmx.lib libmex.lib libmat.lib /implib:%LIB_NAME%.x /MACHINE:X64 kernel64.lib user64.lib gdi64.lib winspool.lib comdlg64.lib advapi64.lib shell64.lib ole64.lib oleaut64.lib uuid.lib odbc64.lib odbccp64.lib
set LINKOPTIMFLAGS=
set LINKDEBUGFLAGS=/DEBUG /PDB:“%OUTDIR%%MEX_NAME%%MEX_EXT%.pdb”
set LINK_FILE=
set LINK_LIB=
set NAME_OUTPUT=/out:“%OUTDIR%%MEX_NAME%%MEX_EXT%”
set RSP_FILE_INDICATOR=@
rem ************************************************************
rem Resource compiler parameters
rem ************************************************************
set RC_COMPILER=rc /fo “%OUTDIR%mexversion.res”
set RC_LINKER=
set POSTLINK_CMDS=del “%OUTDIR%%MEX_NAME%.map”
set POSTLINK_CMDS1=del %LIB_NAME%.x
set POSTLINK_CMDS2=mt -outputresource:“%OUTDIR%%MEX_NAME%%MEX_EXT%”;2 -manifest “%OUTDIR%%MEX_NAME%%MEX_EXT%.manifest”
set POSTLINK_CMDS3=del “%OUTDIR%%MEX_NAME%%MEX_EXT%.manifest”
[/codebox]
Below is the square_me.c code that is pulled from the white paper Accelerating MATLAB with CUDA Using MEX Files.
[codebox]#include “cuda.h”
#include “cuda_runtime.h”
#include “mex.h”
/* Kernel to square elements of the array on the GPU */
global void square_elements(float* in, float* out, int N)
{
int idx = blockIdx.x*blockDim.x+threadIdx.x;
if ( idx < N)
out[idx]=in[idx]*in[idx];
}
/* Gateway function */
void mexFunction(int nlhs, mxArray *plhs,
int nrhs, const mxArray *prhs[])
{
int i, j, m, n;
double *data1, *data2;
float *data1f, *data2f;
float *data1f_gpu, *data2f_gpu;
mxClassID category;
if (nrhs != nlhs)
mexErrMsgTxt("The number of input and output arguments must be the same.");
for (i = 0; i < nrhs; i++)
{
/* Find the dimensions of the data */
m = mxGetM(prhs[i]);
n = mxGetN(prhs[i]);
/* Create an mxArray for the output data */
plhs[i] = mxCreateDoubleMatrix(m, n, mxREAL);
/* Create an input and output data array on the GPU*/
cudaMalloc( (void **) &data1f_gpu,sizeof(float)*m*n);
cudaMalloc( (void **) &data2f_gpu,sizeof(float)*m*n);
/* Retrieve the input data */
data1 = mxGetPr(prhs[i]);
/* Check if the input array is single or double precision */
category = mxGetClassID(prhs[i]);
if( category == mxSINGLE_CLASS)
{
/* The input array is single precision, it can be sent directly to the card */
cudaMemcpy( data1f_gpu, data1, sizeof(float)*m*n, cudaMemcpyHostToDevice);
}
if( category == mxDOUBLE_CLASS)
{
/* The input array is in double precision, it needs to be converted to floats before being sent to the card */
data1f = (float *) mxMalloc(sizeof(float)*m*n);
for (j = 0; j < m*n; j++)
{
data1f[j] = (float) data1[j];
}
cudaMemcpy( data1f_gpu, data1f, sizeof(float)*n*m, cudaMemcpyHostToDevice);
}
data2f = (float *) mxMalloc(sizeof(float)*m*n);
/* Compute execution configuration using 128 threads per block */
dim3 dimBlock(128);
dim3 dimGrid((m*n)/dimBlock.x);
if ( (n*m) % 128 !=0 )
dimGrid.x+=1;
/* Call function on GPU */
square_elements<<<dimGrid,dimBlock>>>(data1f_gpu, data2f_gpu, n*m);
/* Copy result back to host */
cudaMemcpy( data2f, data2f_gpu, sizeof(float)*n*m, cudaMemcpyDeviceToHost); //error here hope this is right
/* Create a pointer to the output data */
data2 = mxGetPr(plhs[i]);
/* Convert from single to double before returning */
for (j = 0; j < m*n; j++)
{
data2[j] = (double) data2f[j];
}
/* Clean-up memory on device and host */
mxFree(data1f);
mxFree(data2f);
cudaFree(data1f_gpu);
cudaFree(data2f_gpu);
}
}[/codebox]