Hi,
I am new to CUDA programming with LinkedLists and unified memory. My data structure is like this,
struct Element{
int value;
int yPosition;
struct Element * next;
};
I have allocated memory for these array of structures in the unified memory. And my kernel code looks like this,
global void testkernel( struct Element ** adjacencyList,
int * vertices,
int* inputV, int * finalLevel,int * output){
int index = blockIdx.x*blockDim.x+threadIdx.x;
if(*finalLevel>0 & index<*vertices){
struct Element * traverse = adjacencyList[index];
if(traverse==NULL){
output[index]=0;
}else{
int ans=1;
while (traverse != NULL) {
int val = (traverse->value )*(inputV[traverse->yPosition]);
atomicAdd(&ans, val);
traverse = traverse->next;
}
output[index]=ans;
}
}
}
When I run my code I was uncounted with a Cuda error of an illegal memory access.
Next I debugged using Cuda-gdb,
[Switching focus to CUDA kernel 0, grid 1, block (0,0,0), thread (0,0,0), device 0, sm 14, warp 0, lane 0]
Breakpoint 2, testkernel<<<(1,1,1),(6,1,1)>>> (adjacencyList=0x1100004000,
vertices=0x1100000000, inputV=0x1100005000, finalLevel=0x1100003000,
output=0x1100006000) at bfsLLSMVP.cu:36
36 int index = blockIdx.x*blockDim.x+threadIdx.x;
(cuda-gdb) step
38 if(*finalLevel>0 & index<*vertices){
(cuda-gdb) print index
$1 = 0
(cuda-gdb) print *vertices
$2 = 6
(cuda-gdb) step
40 struct Element * traverse = adjacencyList[index];
(cuda-gdb) print traverse
$3 = (@generic Element * @local) 0x0
(cuda-gdb) step
44 if(traverse==NULL){
(cuda-gdb) step
CUDA Exception: Device Illegal Address
The exception was triggered in device 0.
Program received signal CUDA_EXCEPTION_10, Device Illegal Address.
[Switching focus to CUDA kernel 0, grid 1, block (0,0,0), thread (3,0,0), device 0, sm 14, warp 0, lane 3]
0x0000000000a4cb68 in testkernel<<<(1,1,1),(6,1,1)>>> (
adjacencyList=0x1100004000, vertices=0x1100000000, inputV=0x1100005000,
finalLevel=0x1100003000, output=0x1100006000) at bfsLLSMVP.cu:52
52 int val = (traverse->value )*(inputV[traverse->yPosition]);
(cuda-gdb)
Can anybody tell me why this is happening? Can’t we use linked-Lists inside CUDA kernels?