Hi, my question is quite simple.
float * p = (float*) malloc(size * sizeof(float));
float * dev_p;
cudaMalloc( (void**) &dev_p, size * sizeof(float) );
cudaMemcpy( dev_p, p, size * sizeof(float), cudaMemcpyHostToDevice );
Now, if I have a kernel like this:
global void kernel_foo (float* p){
}
Can i call it this way from host?
int offset = 2; //for example
kernel_foo<<<dimGrid, dimBlocks>>> (dev_p + offset);
Can i use pointer arithmetic from host with a “device pointer”?