Hi,
I’m trying to use OpenMP in a cuda code, which then I compile to a MEX file and run in Matlab. However, only 1 thread works.
#include <cuda_runtime.h>
#include “device_launch_parameters.h”
#include <stdio.h>
#include “cuda.h”
#include “mex.h”
#include “omp.h”
void mexFunction(int nlhs, mxArray* plhs,
int nrhs, const mxArray* prhs)
{
int* Output;
plhs[0] = mxCreateNumericMatrix(1, 6, mxINT32_CLASS, mxREAL);
Output = (int*)mxGetData(plhs[0]);
omp_set_num_threads(6);
#pragma omp parallel
{
int ID = omp_get_thread_num();
Output[ID] = (int)(ID + 10);
}
}
So, i expect to see 10 12 13 14 15 16 (maybe in another order), but I get 10 0 0 0 0 0 in Matlab, which does not make sense. I compile my code with :
Flags=[‘CXXFLAGS="$CXXFLAGS -std=c++14 -O3 -fopenmp" ‘, ‘LDFLAGS="$LDFLAGS -fopenmp"’];
mexcuda(’-R2018a’,‘OpenMPTest.cu’, Flags );
OpenMPTest.cu (470 Bytes)
Could you please help on what I’m missing?