I just started learning about programming in CUDA. According to what I have learnt so far, in order to access variables from the host(CPU), the device(GPU) pointers need to be copied to the host memory using the function -
cudaMemcpy(&h_c, d_c, sizeof(int), cudaMemcpyDeviceToHost);
where h_c is host variable, and *d_c is a device pointer.
However, since in Jetson Xavier AGX the GPU and the CPU use the same memory is there a way that we can avoid the copying and access the variables directly?
If yes, an example would be extremely helpful.
I tried executing the following -
#include <iostream>
#include <cuda.h>
#include <cuda_runtime.h>
__global__ void gpuAdd(int d_a, int d_b, int *d_c)
{
//This code will be executed on the device instead of the host.
*d_c = d_a + d_b;
}
int main(void)
{
//Defining device pointer
int *d_c;
//Allocating memory for device pointer
cudaMalloc((void**)&d_c, sizeof(int));
//Kernel call by passing 1 and 4 as inputs and storing answer in d_c
//<< <1,1> >> means 1 block is executed with 1 thread per block
gpuAdd << <1, 1 >> > (1, 4, d_c);
printf("1 + 4 = %d\n", *d_c);
//Free up memory
cudaFree(d_c);
return 0;
}
On executing it gives the following error -
Which means that the memory is out of bounds for the host.