I have a code in CUDA as follows that is being compiled simply by doing “$ nvcc test.cu”
#include<stdio.h>
#include<stdlib.h>
#include<cuda_runtime.h>
struct someStruct
{
int someInteger;
int *somePointer;
};
typedef struct someStruct someStruct;
someStruct* start_struct()
{
someStruct *myStruct = (someStruct*)malloc(sizeof(someStruct));
if(myStruct)
{
myStruct->somePointer = (int*)malloc(10*sizeof(int));
myStruct->someInteger = 13;
for(int i = 0;i<10;i++)
myStruct->somePointer[i] = i+10;
}
return myStruct;
}
__global__ void print_on_cuda(someStruct* myStruct_d, int* somePointer)
{
int threadId = blockIdx.x * blockDim.x + threadIdx.x;
int sum = 0;
for(int i = 0; i<10; i++)
sum += somePointer[i];
for(int i = 0; i<10; i++){
printf("someInteger %d on thread %d with index = %d and sum = %d \n",myStruct_d->someInteger, threadId, i, sum/*, myStruct_d->somePointer[i]*/);
// Print the value of myStruct_d->somePointer[i] for debugging
printf("Value at somePointer[%d] = %d\n", i, somePointer[i]);
}
return;
}
int main(){
// myStruct is initialized
someStruct* myStruct;
myStruct = start_struct();
someStruct* myStruct_d;
int* somePointer_d;
cudaMalloc((void**)&myStruct_d, sizeof(someStruct));
cudaMalloc((int**)&somePointer_d, 10*sizeof(int));
cudaMemcpy(myStruct_d, myStruct, sizeof(someStruct), cudaMemcpyHostToDevice);
cudaMemcpy(somePointer_d, myStruct->somePointer, 10*sizeof(int), cudaMemcpyHostToDevice);
for(int counter = 0; counter < 3; counter++){
print_on_cuda<<<2,10>>>(myStruct_d, somePointer_d);
cudaDeviceSynchronize();
printf("Counter = %d\n", counter);
}
cudaFree(myStruct_d);
cudaFree(somePointer_d);
free(myStruct->somePointer);
free(myStruct);
return 0;
}
Here, my objective is to access myStruct_d->somePointer[i] inside the kernel without the need of copying it to somePointer_d using cudaMalloc. Notice that here I can print myStruct_d->someInteger insde my kernel call, but whenever I tried to print myStruct_d->somePointer[i] directly instead of doing the copy to somePointer_d I get nothing. Literally nothing, my execution does not stop or give me a segmentation fault. It simply ignores the printf line with this variable, my computer freezes for a few seconds and then it end the execution of the program printing the rest.
I also should say that I tried to use myStruct_d->somePointer for the computing of the variable sum inside the kernel. But he cannot access not even the values inside this pointer. So I get a similar problem.
I am searching for a way to fix this problem without having the need to copy pointer by pointer inside the kernel separately, because in my original application my struct is bigger and it would demand too many changes in my functions and parameters. I was taking a look at the function cudaMemcpySymbol for instance. But I am not quite sure if I understand what it does and if it would solve my problem, but I am open to any suggestions.
Thanks