Accessing pointer values inside struct copied to CUDA device

I found two solutions to my problem. One is using Unified memory by means of function cudaMallocManaged() (as mentioned in cuda - Unified memory and struct with arrays - Stack Overflow). The code is as follows and execute as I intended:

#include<stdio.h>
#include<stdlib.h>
#include<cuda_runtime.h>

struct someStruct
{
  int someInteger;
  int *somePointer;
};

typedef struct someStruct someStruct;

someStruct* start_struct()
{
  someStruct *myStruct = (someStruct*)malloc(sizeof(someStruct));
  if(myStruct)
    {
      myStruct->somePointer = (int*)malloc(10*sizeof(int));
      myStruct->someInteger = 13;
      
      for(int i = 0;i<10;i++)
	myStruct->somePointer[i] = i+10;
    }
  return myStruct;
}

__global__ void print_on_cuda(someStruct* myStruct_d, int* somePointer)
{
  int threadId = blockIdx.x * blockDim.x + threadIdx.x;
  int sum = 0;
  for(int i = 0; i<10; i++)
    sum += somePointer[i];
  for(int i = 0; i<10; i++){
    printf("someInteger %d on thread %d with index = %d and sum = %d \n",myStruct_d->someInteger, threadId, i, sum/*, myStruct_d->somePointer[i]*/);
    // Print the value of myStruct_d->somePointer[i] for debugging
    printf("Value at somePointer_d[%d] = %d and myStruct_d->somePointer[%d] = %d\n", i, somePointer[i], i, myStruct_d->somePointer[i]);
  }
  return;
}

int main(){
  // myStruct is initialized on CPU
  someStruct* myStruct;
  myStruct = start_struct();

  // Device variables
  int* somePointer_d;
  
  // Allocating and copying to some test pointer
  cudaMalloc((int**)&somePointer_d, 10*sizeof(int));
  cudaMemcpy(somePointer_d, myStruct->somePointer, 10*sizeof(int), cudaMemcpyHostToDevice);

  // Using Unified Memory property
  someStruct* structTest;
  cudaMallocManaged(&structTest,sizeof(someStruct));

  structTest->somePointer = somePointer_d;
  structTest->someInteger = myStruct->someInteger;

  for(int counter = 0; counter < 3; counter++){
    print_on_cuda<<<2,10>>>(structTest, somePointer_d);
    cudaDeviceSynchronize();
    printf("Counter = %d\n", counter);
  }
  
  cudaFree(somePointer_d);
  free(myStruct->somePointer);
  free(myStruct);
  return 0;
}

The other one is performing a deep copy (CUDA : How to allocate memory for data member of a class - Stack Overflow or even more explicitly stated in Copying a struct containing pointers to CUDA device - Stack Overflow). The code is as follows:

#include<stdio.h>
#include<stdlib.h>
#include<cuda_runtime.h>

struct someStruct
{
  int someInteger;
  int *somePointer;
};

typedef struct someStruct someStruct;

someStruct* start_struct()
{
  someStruct *myStruct = (someStruct*)malloc(sizeof(someStruct));
  if(myStruct)
    {
      myStruct->somePointer = (int*)malloc(10*sizeof(int));
      myStruct->someInteger = 13;
      
      for(int i = 0;i<10;i++)
	myStruct->somePointer[i] = i+10;
    }
  return myStruct;
}

__global__ void print_on_cuda(someStruct myStruct_d, int* somePointer)
{
  int threadId = blockIdx.x * blockDim.x + threadIdx.x;
  int sum = 0;
  for(int i = 0; i<10; i++)
    sum += somePointer[i];
  for(int i = 0; i<10; i++){
    printf("someInteger %d on thread %d with index = %d and sum = %d \n",myStruct_d.someInteger, threadId, i, sum);
    // Print the value of myStruct_d->somePointer[i] for debugging
    printf("Value at somePointer_d[%d] = %d and myStruct_d->somePointer[%d] = %d\n", i, somePointer[i], i, myStruct_d.somePointer[i]);
  }
  return;
}

int main(){
  // myStruct is initialized on CPU
  someStruct* myStruct;
  myStruct = start_struct();

  // Device variables
  someStruct myStruct_d;
  int* somePointer_d;
  
  // Allocating and copying to some test pointer
  cudaMalloc((int**)&somePointer_d, 10*sizeof(int));
  cudaMemcpy(somePointer_d, myStruct->somePointer, 10*sizeof(int), cudaMemcpyHostToDevice);

  myStruct_d.somePointer = somePointer_d;
  myStruct_d.someInteger = myStruct->someInteger;

  for(int counter = 0; counter < 3; counter++){
    print_on_cuda<<<2,10>>>(myStruct_d, somePointer_d);
    cudaDeviceSynchronize();
    printf("Counter = %d\n", counter);
  }
  
  cudaFree(somePointer_d);
  free(myStruct->somePointer);
  free(myStruct);
  return 0;
}

According to some references online these two versions might differ in performance. The one using Unified Memory tend to be a little less performant. Also notice that the syntax in function calling for both functions change given the way we have to declare variables structTest (as a pointer) and myStruct_d (not a pointer).

Other references: Clean way of copying a struct with pointers to the GPU , https://developer.nvidia.com/blog/unified-memory-in-cuda-6/ and Help me understand cuda memory management - why is cudaMemcpy (not) needed? .

Also, I am compiling both codes only by doing “$ nvcc myCudaFile.cu”. This might be preventing some errors or warnings to appear when one more flags are added to compilation process.