Hi,
the best things to do it’s to look all GPU memory with ReadBuffer to check the data
If you have very complicated kernel you can make on CPU to see if there is some problem
// create buffers on device
cl_mem vol_a = clCreateBuffer(gpu_context, CL_MEM_WRITE_ONLY, mem_size, &a, &err);
shrCheckError(err, CL_SUCCESS);
cl_mem vol_b = clCreateBuffer(gpu_context, CL_MEM_WRITE_ONLY, mem_size, &b, &err);
shrCheckError(err, CL_SUCCESS);
cl_mem vol_c = clCreateBuffer(gpu_context, CL_MEM_READ_ONLY, mem_size, &c, &err);
shrCheckError(err, CL_SUCCESS);
problem with the declaration
You make c = a + b
So c = CL_MEM_WRITE_ONLY and a et b are CL_MEM_READ_ONLY and not the inverse ;)
Moreover, after you make a WriteBuffer it’s more simple to make :
cl_mem vol_a = clCreateBuffer(gpu_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, &a, &err);
shrCheckError(err, CL_SUCCESS);
cl_mem vol_b = clCreateBuffer(gpu_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, &b, &err);
shrCheckError(err, CL_SUCCESS);
cl_mem vol_c = clCreateBuffer(gpu_context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, &c, &err);
shrCheckError(err, CL_SUCCESS);
Last thing, why you put the adress of a, b and c (&a, &b and &c) instead of a, b and c ?
this code runs:
cl_mem vol_a = clCreateBuffer(gpu_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, a, &err);
shrCheckError(err, CL_SUCCESS);
cl_mem vol_b = clCreateBuffer(gpu_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, b, &err);
shrCheckError(err, CL_SUCCESS);
cl_mem vol_c = clCreateBuffer(gpu_context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, c, &err);
shrCheckError(err, CL_SUCCESS);
same thing for the readbuffer
Thanks
J