Hey guys. I’m currently working on a CUDA project which consists of 2 basic kernels, who
work on 3 one-dimensional unsigned int arrays device_Ca, device_Ua, device_Ma. The problem is
that simple operations on them won’t work properly. The code of the program is as follows:
global void CUDA_KERNEL1(unsigned int V, unsigned int* device_Va, unsigned int E,
unsigned int* device_Ea, unsigned int* device_Wa, unsigned int* device_Ma,
unsigned int* device_Ca, unsigned int* device_Ua) {
unsigned int nid, start, end, i;
unsigned int tid = threadIdx.x;
if(tid<V) {
if(device_Ma[tid] == TRUE) {
device_Ma[tid] = FALSE;
//for all neighbors nid of tid
if((tid+1)<V) {
start = device_Va[tid];
end = device_Va[tid+1] - 1;
}else {
start = device_Va[tid];
end = E - 1;
}
for(i = start; i <= end; i++) {
nid = device_Ea[i];
if(device_Ua[nid] > (device_Ca[tid] + device_Wa[nid])) {
device_Ua[nid] = device_Ca[tid] + device_Wa[nid];
}
}
}
}
}
global void CUDA_KERNEL2(unsigned int V, unsigned int* device_Va, unsigned int E,
unsigned int* device_Ea, unsigned int* device_Wa, unsigned int* device_Ma,
unsigned int* device_Ca, unsigned int* device_Ua) {
unsigned int tid = threadIdx.x;
if(tid < V) {
if(device_Ca[tid] > device_Ua[tid]) {
device_Ca[tid] = device_Ua[tid];
device_Ma[tid] = TRUE;
}
}
}
and the call of the two kernels in my main program is:
CUDA_KERNEL1<<<1, V>>>(V, device_Va, E, device_Ea, device_Wa,
device_Ma, device_Ca, device_Ua);
CUDA_KERNEL2<<<1, V>>>(V, device_Va, E, device_Ea, device_Wa,
device_Ma, device_Ca, device_Ua);
But they don’t seem to work at all. I checked the initiate values of the arrays and they are properly
initialized. Can anyone make a suggestion?
Thanks in advance