Runtime error involving cuSurfRefGetFormat Based on the order of my code, I get a runtime error

I was looking to rearrange some bits of code so that I follow some order and while both versions compile and run, the second version has a runtime error. Essentially I switch lines 2 and 3 and I get a runtime error. The reason I want to switch them is that I wanted to organize the code based on the type (global, constant, texture, etc). Why is there an error?

Version 1:

[codebox]

// Copy image data from host to device

error = clEnqueueWriteBuffer(command_queue, c_dev_matrix, CL_FALSE, 0, matrix_size, &kargs->matrix, 0, NULL, &device_event_1);

error |= clEnqueueWriteBuffer(command_queue, dev_img, CL_FALSE, 0, img_size, cbi->img, 0, NULL, &device_event_2);

oclCheckError(error, CL_SUCCESS);

clFinish(command_queue);

[/codebox]

Version 2:

[codebox]

// Copy image data from host to device

error = clEnqueueWriteBuffer(command_queue, dev_img, CL_FALSE, 0, img_size, cbi->img, 0, NULL, &device_event_2);

error |= clEnqueueWriteBuffer(command_queue, c_dev_matrix, CL_FALSE, 0, matrix_size, &kargs->matrix, 0, NULL, &device_event_1);

oclCheckError(error, CL_SUCCESS);

clFinish(command_queue);

[/codebox]

Setting of the arguments:

[codebox]

error |= clSetKernelArg(fdk_kernel, 0, sizeof(cl_mem), (void *) &g_dev_vol);

error |= clSetKernelArg(fdk_kernel, 1, sizeof(cl_mem), (void *) &c_img_dim);

error |= clSetKernelArg(fdk_kernel, 2, sizeof(cl_mem), (void *) &c_ic);

error |= clSetKernelArg(fdk_kernel, 3, sizeof(cl_mem), (void *) &c_nrm);

error |= clSetKernelArg(fdk_kernel, 4, sizeof(cl_mem), (void *) &c_sad);

error |= clSetKernelArg(fdk_kernel, 5, sizeof(cl_mem), (void *) &c_scale);

error |= clSetKernelArg(fdk_kernel, 6, sizeof(cl_mem), (void *) &c_vol_offset);

error |= clSetKernelArg(fdk_kernel, 7, sizeof(cl_mem), (void *) &c_vol_dim);

error |= clSetKernelArg(fdk_kernel, 8, sizeof(cl_mem), (void *) &c_vol_pix_spacing);

error |= clSetKernelArg(fdk_kernel, 9, sizeof(cl_mem), (void *) &c_dev_matrix);

error |= clSetKernelArg(fdk_kernel, 10, sizeof(cl_mem), (void *) &t_dev_img);

oclCheckError(error, CL_SUCCESS);

[/codebox]

Relevant kernel code:

[codebox]

__kernel void kernel_fdk(

__global float *dev_vol, 

__constant int2 *img_dim, 

__constant float2 *ic, 

__constant float4 *nrm, 

__constant float *sad, 

__constant float *scale, 

__constant float4 *vol_offset, 

__constant int4 *vol_dim, 

__constant float4 *vol_pix_spacing, 

__constant float *dev_matrix,

__global float *dev_img

)

[/codebox]

Error:

[codebox]

007B7A58 (0x00000000 0x00000000 0x00000000 0x00000000), cuSurfRefGetFormat()+744

24 bytes(s)

[/codebox]

Thanks for the help!!!

I now have another similar error, and can’t seem to fix it by changing the order of memory exchange.

Based on the size of my texture, constant and global memory, I don’t think I should be going over any limits.

This is the new error, and below are the types and size of my buffers.

[codebox]

008D3A67 (0x00000000 0x00000000 0x00000000 0x00000000), cuSurfRefGetFormat()+580 55 bytes(s)

[/codebox]

[codebox]

// Create global memory buffers on device

cl_mem g_dev_vol = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, vol_size, NULL, &error);

oclCheckError(error, CL_SUCCESS);

// Create texture/image memory buffers on device

size_t img_origin[3] = {0, 0, 0};

size_t img_region[3] = {img_dim[0], img_dim[1], 1};

size_t img_row_pitch = img_dim[0] * sizeof(float);

cl_image_format img_format;

img_format.image_channel_order = CL_R;

img_format.image_channel_data_type = CL_FLOAT;

cl_mem t_dev_img = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, &img_format, img_dim[0], img_dim[1], 0, NULL, &error);

oclCheckError(error, CL_SUCCESS);

// Create constant memory buffers on device

cl_mem c_dev_matrix = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, matrix_size, NULL, &error);

oclCheckError(error, CL_SUCCESS);

cl_mem c_nrm = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(float4), NULL, &error);

oclCheckError(error, CL_SUCCESS);

cl_mem c_vol_offset = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(float4), NULL, &error);

oclCheckError(error, CL_SUCCESS);

cl_mem c_vol_pix_spacing = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(float4), NULL, &error);

oclCheckError(error, CL_SUCCESS);

cl_mem c_vol_dim = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(int4), NULL, &error);

oclCheckError(error, CL_SUCCESS);

cl_mem c_ic = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(float2), NULL, &error);

oclCheckError(error, CL_SUCCESS);

cl_mem c_img_dim = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(int2), NULL, &error);

oclCheckError(error, CL_SUCCESS);

cl_mem c_sad = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(float), NULL, &error);

oclCheckError(error, CL_SUCCESS);

cl_mem c_scale = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(float), NULL, &error);

oclCheckError(error, CL_SUCCESS);

[/codebox]

After doing some clean installs and getting the error again, I narrowed it down to 32-bit vs 64-bit issues. I was running Windows 7 64 bit, and the 64 bit driver. I was building and running a 32 bit program, and I guess there were issues with memory addresses or access between the two, although I can’t understand why. All the errors went away after using purely 64 bit drivers and sdk.