Hello,
I am currently having exceptional difficulty in copying and reading an array which the device sends back to the host. When I attempt to read the data which I am supposed to have returned to me, all I get is junk data. Could anyone take a look at my code snippets and tell me what I’m doing wrong? Thank you very much!
struct intss {
u_int32_t one;
u_int32_t two;
};
int main()
{
int block_size = 3;
int grid_size = 1;
intss *device_fb = 0;
intss *host_fb = 0;
int num_bytes_fb = (block_size*grid_size)*sizeof(intss);
host_fb = (intss*)malloc(num_bytes_fb);
cudaMalloc((void **)&device_fb, num_bytes_fb);
....
render2<<<block_size,grid_size>>>(device_fb, device_pixelspercore, samples, obj_list_flat_dev, numOpsPerCore, lnumdev, camdev, lightsdev, uranddev, iranddev);
....
cudaMemcpy(host_fb, device_fb, num_bytes_fb, cudaMemcpyDeviceToHost);
printf("output %d ", host_fb[0].one);
printf("output %d ", host_fb[1].one);
printf("output %d ", host_fb[2].one);
//Note that I'm only looking at elements the 3 elements 0-2 from host_fb. I am doing this because block_size*grid_size = 3. Is this wrong?
cudaFree(device_fb);
free(host_fb);
}
global void render2(intss *device_fb, struct parallelPixels *pixelsPerCore, int samples, double *obj_list_flat_dev, int numOpsPerCore, int lnumdev, struct camera camdev, struct vec3 *lightsdev, struct vec3 *uranddev, int *iranddev) //SPECIFY ARGUMENTS!!!
{
int index = blockIdx.x * blockDim.x + threadIdx.x; //DETERMINING INDEX BASED ON WHICH THREAD IS CURRENTLY RUNNING
....
//computing data...
device_fb[index].one = (((u_int32_t)(MIN(r, 1.0) * 255.0) & 0xff) << RSHIFT |
((u_int32_t)(MIN(g, 1.0) * 255.0) & 0xff) << GSHIFT |
((u_int32_t)(MIN(b, 1.0) * 255.0) & 0xff) << BSHIFT);
}